framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,96,1,128,1,fp8,fp8,0,23.29273223876953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,96,1,128,1,float16,fp8,0,23.374777221679686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,96,2,128,1,float16,fp8,0,23.407289123535158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,96,2,128,1,fp8,fp8,0,23.579844665527343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,96,4,128,1,float16,fp8,0,23.528463745117186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,96,4,128,1,fp8,fp8,0,23.782124328613282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,96,1,128,1,float16,float16,0,32.758236694335935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,96,2,128,1,float16,float16,0,31.89466247558594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,96,8,128,1,float16,fp8,0,23.867344665527344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,96,8,128,1,fp8,fp8,0,23.472088623046876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,96,128,1,float16,fp8,0,12.425838470458984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,96,4,128,1,float16,float16,0,31.894989013671875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,96,128,1,fp8,fp8,0,12.639185333251953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,1,128,1,float16,float16,0,15.9940185546875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,1,128,1,float16,fp8,0,11.860606384277343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,1,128,1,fp8,fp8,0,11.734880065917968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,2,128,1,float16,float16,0,16.43920440673828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,2,128,1,float16,fp8,0,11.791667175292968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,96,8,128,1,float16,float16,0,32.3439208984375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,2,128,1,fp8,fp8,0,11.739761352539062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,4,128,1,float16,float16,0,16.423359680175782
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,4,128,1,fp8,fp8,0,11.805977630615235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,4,128,1,float16,fp8,0,11.857855987548827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,8,128,1,float16,float16,0,16.136170959472658
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,8,128,1,float16,fp8,0,11.973324584960938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,8,128,1,fp8,fp8,0,11.842033386230469
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,96,128,1,float16,fp8,0,6.303254318237305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,96,128,1,fp8,fp8,0,6.265958404541015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,1,128,1,float16,float16,0,7.89163818359375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,1,128,1,float16,fp8,0,5.919343948364258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,1,128,1,fp8,fp8,0,5.939199829101563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,2,128,1,float16,float16,0,8.128612518310547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,2,128,1,float16,fp8,0,5.954150390625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,2,128,1,fp8,fp8,0,5.942180633544922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,4,128,1,float16,float16,0,7.90930404663086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,4,128,1,float16,fp8,0,5.919385528564453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,4,128,1,fp8,fp8,0,5.951036834716797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,8,128,1,float16,float16,0,8.06237564086914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,8,128,1,float16,fp8,0,5.912899017333984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,8,128,1,fp8,fp8,0,5.9306385040283205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,96,128,1,float16,fp8,0,3.1502239227294924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,96,128,1,fp8,fp8,0,3.161004829406738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,1,128,1,float16,float16,0,3.7543918609619142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,1,128,1,float16,fp8,0,3.003160095214844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,1,128,1,fp8,fp8,0,2.9317087173461913
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,2,128,1,float16,float16,0,3.8951934814453124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,2,128,1,float16,fp8,0,3.079654312133789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,2,128,1,fp8,fp8,0,2.956222343444824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,4,128,1,float16,float16,0,3.7688014984130858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,4,128,1,float16,fp8,0,3.0262304306030274
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,4,128,1,fp8,fp8,0,3.012656021118164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,8,128,1,float16,float16,0,3.8834800720214844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,8,128,1,float16,fp8,0,3.2783409118652345
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,8,128,1,fp8,fp8,0,2.9858928680419923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,96,1,128,1,float16,float16,0,18.725634765625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,96,96,128,1,float16,float16,0,3.8557903289794924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,96,96,128,1,float16,float16,0,8.153596496582031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,96,96,128,1,float16,float16,0,17.14167175292969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,96,1,128,1,float16,fp8,0,13.813729858398437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,96,1,128,1,fp8,fp8,0,13.528871154785156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,96,2,128,1,float16,float16,0,18.125344848632814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,96,2,128,1,float16,fp8,0,13.885081481933593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,96,2,128,1,fp8,fp8,0,13.849504089355468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,96,4,128,1,float16,float16,0,19.079647827148438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,96,4,128,1,float16,fp8,0,13.709249877929688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,96,4,128,1,fp8,fp8,0,13.966510009765624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,96,8,128,1,float16,float16,0,19.0417724609375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,96,8,128,1,float16,fp8,0,13.684165954589844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,96,128,1,float16,float16,0,10.304708862304688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,96,8,128,1,fp8,fp8,0,13.849758911132813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,96,128,1,float16,fp8,0,7.423014068603516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,96,128,1,fp8,fp8,0,7.423579406738281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,1,128,1,float16,fp8,0,6.891529846191406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,1,128,1,float16,float16,0,9.601766204833984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,1,128,1,fp8,fp8,0,6.836924743652344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,2,128,1,float16,float16,0,9.49701156616211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,2,128,1,float16,fp8,0,6.906076812744141
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,2,128,1,fp8,fp8,0,7.027891540527344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,4,128,1,float16,float16,0,9.261116790771485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,4,128,1,float16,fp8,0,6.876255798339844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,4,128,1,fp8,fp8,0,6.925782775878906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,8,128,1,float16,float16,0,9.386500549316406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,8,128,1,float16,fp8,0,7.006295776367187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,96,128,1,float16,float16,0,4.858012771606445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,96,8,128,1,fp8,fp8,0,6.8849952697753904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,96,128,1,float16,fp8,0,3.913422393798828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,96,128,1,fp8,fp8,0,3.7662750244140626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,1,128,1,float16,fp8,0,3.4405776977539064
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,1,128,1,float16,float16,0,4.4316143035888675
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,1,128,1,fp8,fp8,0,3.437104034423828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,2,128,1,float16,fp8,0,3.450105667114258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,2,128,1,float16,float16,0,4.492724609375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,2,128,1,fp8,fp8,0,3.7299598693847655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,4,128,1,float16,float16,0,4.508158493041992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,4,128,1,float16,fp8,0,3.4941024780273438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,4,128,1,fp8,fp8,0,3.516164779663086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,8,128,1,float16,float16,0,4.3208671569824215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,96,128,1,float16,float16,0,2.3383152008056642
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,8,128,1,float16,fp8,0,3.6766078948974608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,96,8,128,1,fp8,fp8,0,3.4838863372802735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,96,128,1,float16,fp8,0,1.9931583404541016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,96,128,1,fp8,fp8,0,2.049625587463379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,1,128,1,float16,float16,0,2.177796745300293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,1,128,1,float16,fp8,0,1.7475072860717773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,1,128,1,fp8,fp8,0,1.7304224014282226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,2,128,1,float16,fp8,0,1.746072006225586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,2,128,1,float16,float16,0,2.3313568115234373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,2,128,1,fp8,fp8,0,1.7291439056396485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,4,128,1,float16,float16,0,2.0838512420654296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,4,128,1,float16,fp8,0,1.926371192932129
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,4,128,1,fp8,fp8,0,1.729047966003418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,8,128,1,float16,float16,0,2.206772804260254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,8,128,1,float16,fp8,0,1.7632816314697266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,96,8,128,1,fp8,fp8,0,1.7655183792114257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,96,1,128,1,float16,fp8,0,9.95457763671875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,96,1,128,1,fp8,fp8,0,9.630430603027344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,96,1,128,1,float16,float16,0,13.082369995117187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,96,2,128,1,float16,float16,0,13.075535583496094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,96,2,128,1,fp8,fp8,0,9.677227020263672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,96,2,128,1,float16,fp8,0,9.932958221435547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,96,4,128,1,float16,fp8,0,9.843507385253906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,96,4,128,1,float16,float16,0,12.972122192382812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,96,4,128,1,fp8,fp8,0,9.996710205078125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,96,8,128,1,float16,fp8,0,9.844940948486329
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,96,8,128,1,float16,float16,0,13.90435333251953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,96,8,128,1,fp8,fp8,0,9.719009399414062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,96,128,1,float16,float16,0,6.9787345886230465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,96,128,1,float16,fp8,0,5.365358352661133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,96,128,1,fp8,fp8,0,5.429145431518554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,1,128,1,float16,float16,0,6.461124420166016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,1,128,1,float16,fp8,0,5.076473617553711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,1,128,1,fp8,fp8,0,4.940812683105468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,2,128,1,float16,float16,0,6.537657928466797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,2,128,1,fp8,fp8,0,4.870686340332031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,2,128,1,float16,fp8,0,4.8895103454589846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,4,128,1,float16,float16,0,6.578372955322266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,4,128,1,float16,fp8,0,4.967655944824219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,4,128,1,fp8,fp8,0,4.876689529418945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,8,128,1,float16,float16,0,6.453825378417969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,96,128,1,float16,float16,0,3.340105438232422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,8,128,1,fp8,fp8,0,4.985595321655273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,96,128,1,float16,fp8,0,2.884377670288086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,96,128,1,fp8,fp8,0,2.888991928100586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,96,8,128,1,float16,fp8,0,5.08503532409668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,1,128,1,float16,float16,0,3.1251152038574217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,1,128,1,float16,fp8,0,2.4269023895263673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,1,128,1,fp8,fp8,0,2.4310943603515627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,2,128,1,fp8,fp8,0,2.4406272888183596
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,2,128,1,float16,fp8,0,2.712696075439453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,2,128,1,float16,float16,0,3.000868797302246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,4,128,1,float16,float16,0,2.952672004699707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,4,128,1,float16,fp8,0,2.4633152008056642
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,4,128,1,fp8,fp8,0,2.570699119567871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,8,128,1,float16,float16,0,3.092395210266113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,96,128,1,float16,float16,0,1.7333456039428712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,8,128,1,float16,fp8,0,2.6267967224121094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,96,8,128,1,fp8,fp8,0,2.4683631896972655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,96,128,1,float16,fp8,0,1.6000864028930664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,96,128,1,fp8,fp8,0,1.404327964782715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,1,128,1,float16,fp8,0,1.2745280265808105
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,1,128,1,float16,float16,0,1.480996799468994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,2,128,1,float16,fp8,0,1.2479215621948243
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,2,128,1,fp8,fp8,0,1.3454943656921388
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,2,128,1,float16,float16,0,1.5226431846618653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,4,128,1,float16,fp8,0,1.2450112342834472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,4,128,1,fp8,fp8,0,1.4036623954772949
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,1,128,1,fp8,fp8,0,1.2539199829101562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,8,128,1,float16,fp8,0,1.2535231590270997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,8,128,1,float16,float16,0,1.4932448387145996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,8,128,1,fp8,fp8,0,1.2459407806396485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,96,4,128,1,float16,float16,0,1.487001609802246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,96,1,128,1,float16,fp8,0,12.830889892578124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,96,1,128,1,fp8,fp8,0,12.8941162109375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,96,1,128,1,float16,float16,0,17.3877685546875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,96,2,128,1,float16,float16,0,16.530177307128906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,96,2,128,1,float16,fp8,0,13.021530151367188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,96,2,128,1,fp8,fp8,0,13.022523498535156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,96,4,128,1,float16,fp8,0,13.000062561035156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,96,4,128,1,float16,float16,0,18.029023742675783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,96,4,128,1,fp8,fp8,0,13.054902648925781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,96,8,128,1,float16,float16,0,17.677630615234374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,96,8,128,1,float16,fp8,0,12.97094268798828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,96,8,128,1,fp8,fp8,0,13.077565002441407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,96,128,1,float16,float16,0,9.401753234863282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,96,128,1,float16,fp8,0,7.229891204833985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,96,128,1,fp8,fp8,0,7.233956909179687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,1,128,1,float16,fp8,0,6.402164459228516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,1,128,1,float16,float16,0,8.675520324707032
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,1,128,1,fp8,fp8,0,6.457628631591797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,2,128,1,float16,float16,0,8.569737243652344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,2,128,1,float16,fp8,0,6.469087982177735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,2,128,1,fp8,fp8,0,6.579974365234375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,4,128,1,float16,float16,0,8.56525115966797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,4,128,1,float16,fp8,0,6.488438415527344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,4,128,1,fp8,fp8,0,6.476200103759766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,8,128,1,float16,float16,0,8.988116455078124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,8,128,1,float16,fp8,0,6.552308654785156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,96,128,1,float16,float16,0,4.610126495361328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,96,8,128,1,fp8,fp8,0,6.496695709228516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,96,128,1,float16,fp8,0,3.7562110900878904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,1,128,1,float16,fp8,0,3.2197025299072264
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,96,128,1,fp8,fp8,0,3.6448177337646483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,1,128,1,float16,float16,0,4.257904052734375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,1,128,1,fp8,fp8,0,3.235006332397461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,2,128,1,float16,float16,0,4.019604873657227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,2,128,1,float16,fp8,0,3.277212905883789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,2,128,1,fp8,fp8,0,3.209600067138672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,4,128,1,float16,fp8,0,3.2505489349365235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,4,128,1,float16,float16,0,4.155913543701172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,4,128,1,fp8,fp8,0,3.217483139038086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,8,128,1,float16,float16,0,4.069662475585938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,8,128,1,float16,fp8,0,3.2526527404785157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,96,128,1,float16,float16,0,2.325276756286621
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,96,8,128,1,fp8,fp8,0,3.3720256805419924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,96,128,1,fp8,fp8,0,1.8757568359375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,96,128,1,float16,fp8,0,2.0634384155273438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,1,128,1,float16,fp8,0,1.6304960250854492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,1,128,1,float16,float16,0,2.0523199081420898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,1,128,1,fp8,fp8,0,1.625534439086914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,2,128,1,float16,float16,0,1.9139007568359374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,2,128,1,float16,fp8,0,1.6374528884887696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,2,128,1,fp8,fp8,0,1.6975248336791993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,4,128,1,float16,float16,0,1.9298383712768554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,4,128,1,float16,fp8,0,1.8258991241455078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,4,128,1,fp8,fp8,0,1.6371871948242187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,8,128,1,float16,float16,0,2.014507293701172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,96,128,1,float16,float16,0,1.1746047973632812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,8,128,1,float16,fp8,0,1.7572639465332032
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,96,8,128,1,fp8,fp8,0,1.6863391876220704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,96,128,1,float16,fp8,0,1.10894718170166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,1,128,1,float16,fp8,0,0.8382111549377441
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,96,128,1,fp8,fp8,0,0.9749216079711914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,1,128,1,float16,float16,0,1.0918463706970214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,1,128,1,fp8,fp8,0,0.8389663696289062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,2,128,1,float16,fp8,0,0.8545472145080566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,2,128,1,fp8,fp8,0,0.9436191558837891
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,4,128,1,float16,float16,0,1.0174960136413573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,4,128,1,float16,fp8,0,0.8394335746765137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,4,128,1,fp8,fp8,0,0.9340640068054199
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,8,128,1,float16,fp8,0,0.8399888038635254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,8,128,1,float16,float16,0,1.0298239707946777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,8,128,1,fp8,fp8,0,0.8393888473510742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,96,2,128,1,float16,float16,0,0.9859040260314942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,96,1,128,1,fp8,fp8,0,7.612322998046875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,96,1,128,1,float16,float16,0,9.827037048339843
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,96,2,128,1,float16,float16,0,9.328878021240234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,96,1,128,1,float16,fp8,0,7.731241607666016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,96,2,128,1,float16,fp8,0,7.637345886230468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,96,2,128,1,fp8,fp8,0,7.661564636230469
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,96,4,128,1,float16,fp8,0,7.70452651977539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,96,4,128,1,float16,float16,0,9.605022430419922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,96,4,128,1,fp8,fp8,0,7.691458892822266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,96,8,128,1,float16,fp8,0,7.725302124023438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,96,8,128,1,float16,float16,0,10.301198577880859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,96,8,128,1,fp8,fp8,0,7.8189247131347654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,96,128,1,float16,float16,0,5.662630462646485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,96,128,1,float16,fp8,0,4.440547180175781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,96,128,1,fp8,fp8,0,4.440908813476563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,1,128,1,float16,float16,0,4.537740707397461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,1,128,1,float16,fp8,0,3.8990463256835937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,1,128,1,fp8,fp8,0,3.8458942413330077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,2,128,1,float16,float16,0,4.847265625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,2,128,1,fp8,fp8,0,3.822305679321289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,2,128,1,float16,fp8,0,3.9724735260009765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,4,128,1,float16,float16,0,5.0928913116455075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,4,128,1,fp8,fp8,0,3.8192447662353515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,4,128,1,float16,fp8,0,3.8667774200439453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,8,128,1,float16,float16,0,4.822214508056641
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,96,128,1,float16,float16,0,2.7862239837646485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,96,128,1,float16,fp8,0,2.287607955932617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,8,128,1,fp8,fp8,0,3.8101329803466797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,96,8,128,1,float16,fp8,0,4.240230560302734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,96,128,1,fp8,fp8,0,2.305196762084961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,1,128,1,float16,float16,0,2.257756805419922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,1,128,1,float16,fp8,0,1.9223567962646484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,1,128,1,fp8,fp8,0,1.932979202270508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,2,128,1,float16,fp8,0,2.0597408294677733
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,2,128,1,float16,float16,0,2.266988754272461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,4,128,1,float16,float16,0,2.336950492858887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,4,128,1,float16,fp8,0,1.9308752059936523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,4,128,1,fp8,fp8,0,2.000611114501953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,8,128,1,float16,float16,0,2.384320068359375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,2,128,1,fp8,fp8,0,1.928473663330078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,8,128,1,float16,fp8,0,1.9267887115478515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,96,8,128,1,fp8,fp8,0,1.924336051940918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,96,128,1,float16,float16,0,1.504475212097168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,96,128,1,float16,fp8,0,1.1374496459960937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,96,128,1,fp8,fp8,0,1.1384927749633789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,1,128,1,float16,float16,0,1.1495792388916015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,1,128,1,float16,fp8,0,1.0692735671997071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,1,128,1,fp8,fp8,0,0.984670352935791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,2,128,1,float16,float16,0,1.1377087593078614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,2,128,1,fp8,fp8,0,0.9922335624694825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,4,128,1,float16,float16,0,1.141528034210205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,4,128,1,float16,fp8,0,0.9962448120117188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,4,128,1,fp8,fp8,0,0.9831472396850586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,8,128,1,float16,float16,0,1.170964813232422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,8,128,1,float16,fp8,0,1.0008319854736327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,8,128,1,fp8,fp8,0,0.9865200042724609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,96,128,1,float16,float16,0,0.7387263774871826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,96,128,1,float16,fp8,0,0.5912864208221436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,96,128,1,fp8,fp8,0,0.5920144081115722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,1,128,1,float16,float16,0,0.5836880207061768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,96,2,128,1,float16,fp8,0,1.1083104133605957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,1,128,1,float16,fp8,0,0.5119071960449219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,1,128,1,fp8,fp8,0,0.5117551803588867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,2,128,1,float16,float16,0,0.5913167953491211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,2,128,1,float16,fp8,0,0.5108143806457519
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,2,128,1,fp8,fp8,0,0.5118847846984863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,4,128,1,float16,fp8,0,0.5114208221435547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,4,128,1,fp8,fp8,0,0.5115744113922119
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,8,128,1,float16,float16,0,0.6071119785308838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,8,128,1,float16,fp8,0,0.5121295928955079
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,8,128,1,fp8,fp8,0,0.5179599761962891
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,96,4,128,1,float16,float16,0,0.5986976146697998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,96,1,128,1,float16,fp8,0,7.502193450927734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,96,1,128,1,float16,float16,0,9.014667510986328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,96,1,128,1,fp8,fp8,0,7.455707550048828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,96,2,128,1,float16,float16,0,9.017881774902344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,96,2,128,1,float16,fp8,0,7.543099212646484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,96,2,128,1,fp8,fp8,0,7.4639137268066404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,96,4,128,1,float16,float16,0,9.215926361083984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,96,4,128,1,float16,fp8,0,7.504863739013672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,96,4,128,1,fp8,fp8,0,7.583392333984375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,96,8,128,1,float16,float16,0,9.501585388183594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,96,8,128,1,float16,fp8,0,7.478790283203125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,96,8,128,1,fp8,fp8,0,7.489891052246094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,96,128,1,float16,float16,0,5.844992065429688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,96,128,1,float16,fp8,0,4.5780174255371096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,96,128,1,fp8,fp8,0,4.580956649780274
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,1,128,1,float16,float16,0,4.392406463623047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,1,128,1,float16,fp8,0,3.7569648742675783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,1,128,1,fp8,fp8,0,3.7399215698242188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,2,128,1,float16,float16,0,4.46325912475586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,2,128,1,float16,fp8,0,3.761910247802734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,2,128,1,fp8,fp8,0,3.7668704986572266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,4,128,1,float16,float16,0,4.362611389160156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,4,128,1,float16,fp8,0,3.83392333984375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,4,128,1,fp8,fp8,0,3.740631866455078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,8,128,1,float16,float16,0,4.702076721191406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,8,128,1,float16,fp8,0,3.8645519256591796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,96,128,1,float16,float16,0,2.802574348449707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,96,8,128,1,fp8,fp8,0,3.754060745239258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,96,128,1,float16,fp8,0,2.4397119522094726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,96,128,1,fp8,fp8,0,2.300943946838379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,1,128,1,float16,float16,0,2.1566511154174806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,1,128,1,float16,fp8,0,1.8851663589477539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,1,128,1,fp8,fp8,0,1.8951423645019532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,2,128,1,float16,float16,0,2.1712751388549805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,2,128,1,float16,fp8,0,2.1064479827880858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,2,128,1,fp8,fp8,0,1.8841680526733398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,4,128,1,float16,float16,0,2.2123632431030273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,4,128,1,fp8,fp8,0,1.8867168426513672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,8,128,1,float16,float16,0,2.205326461791992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,8,128,1,float16,fp8,0,2.104747200012207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,8,128,1,fp8,fp8,0,1.8973680496215821
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,96,128,1,float16,fp8,0,1.173748779296875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,96,128,1,float16,float16,0,1.5113247871398925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,1,128,1,float16,float16,0,1.1074399948120117
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,1,128,1,float16,fp8,0,1.0280431747436523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,96,128,1,fp8,fp8,0,1.167073631286621
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,96,4,128,1,float16,fp8,0,1.940852737426758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,1,128,1,fp8,fp8,0,0.9589632034301758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,2,128,1,float16,fp8,0,0.9583696365356446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,2,128,1,float16,float16,0,1.0915120124816895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,2,128,1,fp8,fp8,0,0.9577103614807129
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,4,128,1,float16,fp8,0,0.9589839935302734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,4,128,1,float16,float16,0,1.114457607269287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,4,128,1,fp8,fp8,0,0.9756128311157226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,8,128,1,float16,float16,0,1.09791841506958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,96,128,1,float16,float16,0,0.7332015991210937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,8,128,1,float16,fp8,0,0.9601072311401367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,96,128,1,float16,fp8,0,0.6008431911468506
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,96,128,1,fp8,fp8,0,0.60033278465271
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,1,128,1,float16,fp8,0,0.4954048156738281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,1,128,1,float16,float16,0,0.5553872108459472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,1,128,1,fp8,fp8,0,0.4955552101135254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,2,128,1,float16,float16,0,0.5542287826538086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,2,128,1,float16,fp8,0,0.4951920032501221
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,2,128,1,fp8,fp8,0,0.49634242057800293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,4,128,1,float16,float16,0,0.5632512092590332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,4,128,1,fp8,fp8,0,0.5001967906951904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,8,128,1,float16,float16,0,0.5694223880767822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,8,128,1,float16,fp8,0,0.49645280838012695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,8,128,1,fp8,fp8,0,0.514188814163208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,96,128,1,float16,float16,0,0.3849776029586792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,96,128,1,float16,fp8,0,0.32968959808349607
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,96,8,128,1,fp8,fp8,0,0.9743311882019043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,96,128,1,fp8,fp8,0,0.31813440322875974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,1,128,1,float16,fp8,0,0.26438241004943847
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,1,128,1,fp8,fp8,0,0.26350400447845457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,2,128,1,float16,float16,0,0.3002608060836792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,2,128,1,float16,fp8,0,0.2634079933166504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,2,128,1,fp8,fp8,0,0.26438078880310056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,4,128,1,float16,fp8,0,0.2644959926605225
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,4,128,1,fp8,fp8,0,0.26446559429168703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,8,128,1,float16,float16,0,0.3040704011917114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,8,128,1,float16,fp8,0,0.2645375967025757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,8,128,1,fp8,fp8,0,0.2648736000061035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,1,128,1,float16,float16,0,0.2981631994247437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,96,4,128,1,float16,float16,0,0.29644639492034913
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,96,4,128,1,float16,fp8,0,0.49554882049560545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,96,1,128,1,float16,float16,0,5.497895812988281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,96,1,128,1,float16,fp8,0,4.611337661743164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,96,1,128,1,fp8,fp8,0,4.600732803344727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,96,2,128,1,float16,float16,0,5.483729553222656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,96,2,128,1,float16,fp8,0,4.609403228759765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,96,2,128,1,fp8,fp8,0,4.6034400939941404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,96,4,128,1,float16,float16,0,5.317001724243164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,96,4,128,1,float16,fp8,0,4.618217468261719
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,96,4,128,1,fp8,fp8,0,4.610710525512696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,96,8,128,1,float16,float16,0,5.483035278320313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,96,8,128,1,float16,fp8,0,4.637230300903321
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,96,128,1,float16,float16,0,3.5177440643310547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,96,8,128,1,fp8,fp8,0,4.621670532226562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,96,128,1,fp8,fp8,0,2.9310096740722655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,96,128,1,float16,fp8,0,3.026483154296875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,1,128,1,float16,float16,0,2.6475263595581056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,1,128,1,float16,fp8,0,2.323257637023926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,1,128,1,fp8,fp8,0,2.311795234680176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,2,128,1,float16,float16,0,2.568849563598633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,2,128,1,float16,fp8,0,2.4241247177124023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,2,128,1,fp8,fp8,0,2.315795135498047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,4,128,1,float16,float16,0,2.58813591003418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,4,128,1,float16,fp8,0,2.4407936096191407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,4,128,1,fp8,fp8,0,2.319503974914551
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,8,128,1,fp8,fp8,0,2.31878719329834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,8,128,1,float16,float16,0,2.743961524963379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,96,8,128,1,float16,fp8,0,2.537246322631836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,96,128,1,float16,float16,0,1.7650991439819337
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,1,128,1,float16,float16,0,1.2887264251708985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,96,128,1,float16,fp8,0,1.4793919563293456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,96,128,1,fp8,fp8,0,1.520796775817871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,1,128,1,float16,fp8,0,1.1878031730651855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,1,128,1,fp8,fp8,0,1.1764528274536132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,2,128,1,float16,fp8,0,1.173040008544922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,2,128,1,float16,float16,0,1.2927311897277831
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,2,128,1,fp8,fp8,0,1.170193576812744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,4,128,1,float16,float16,0,1.3001296043395996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,4,128,1,float16,fp8,0,1.1725343704223632
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,4,128,1,fp8,fp8,0,1.1704992294311523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,8,128,1,float16,float16,0,1.3399456024169922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,96,128,1,float16,float16,0,0.8929984092712402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,8,128,1,float16,fp8,0,1.1943887710571288
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,96,8,128,1,fp8,fp8,0,1.1744272232055664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,96,128,1,float16,fp8,0,0.8145744323730468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,1,128,1,float16,float16,0,0.6739312171936035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,1,128,1,float16,fp8,0,0.5992447853088378
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,96,128,1,fp8,fp8,0,0.754088020324707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,1,128,1,fp8,fp8,0,0.6432735919952393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,2,128,1,float16,fp8,0,0.5997360229492188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,2,128,1,float16,float16,0,0.6577951908111572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,4,128,1,float16,float16,0,0.6697231769561768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,4,128,1,fp8,fp8,0,0.5988768100738525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,4,128,1,float16,fp8,0,0.5994256019592286
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,8,128,1,float16,float16,0,0.6869647979736329
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,8,128,1,fp8,fp8,0,0.6002895832061768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,96,128,1,float16,fp8,0,0.39219839572906495
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,96,128,1,float16,float16,0,0.47301440238952636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,96,128,1,fp8,fp8,0,0.39296319484710696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,1,128,1,float16,fp8,0,0.313372802734375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,1,128,1,fp8,fp8,0,0.31335840225219724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,2,128,1,fp8,fp8,0,0.5983024120330811
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,2,128,1,float16,float16,0,0.35010080337524413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,2,128,1,float16,fp8,0,0.31290080547332766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,2,128,1,fp8,fp8,0,0.3134768009185791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,4,128,1,float16,float16,0,0.34575839042663575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,4,128,1,float16,fp8,0,0.31361439228057864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,4,128,1,fp8,fp8,0,0.3126271963119507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,96,8,128,1,float16,fp8,0,0.5994351863861084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,8,128,1,float16,float16,0,0.35678560733795167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,8,128,1,float16,fp8,0,0.31322240829467773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,8,128,1,fp8,fp8,0,0.3135711908340454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,96,128,1,float16,float16,0,0.2485743999481201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,96,128,1,float16,fp8,0,0.21070399284362792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,96,128,1,fp8,fp8,0,0.2106544017791748
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,1,128,1,float16,float16,0,0.18919359445571898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,1,128,1,float16,fp8,0,0.16944799423217774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,1,128,1,fp8,fp8,0,0.16879839897155763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,2,128,1,float16,float16,0,0.18967519998550414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,96,1,128,1,float16,float16,0,0.34950718879699705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,2,128,1,float16,fp8,0,0.16984479427337645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,2,128,1,fp8,fp8,0,0.16862239837646484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,4,128,1,float16,float16,0,0.1903488039970398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,4,128,1,float16,fp8,0,0.16894880533218384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,4,128,1,fp8,fp8,0,0.16932480335235595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,8,128,1,float16,float16,0,0.18865599632263183
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,8,128,1,float16,fp8,0,0.1705024003982544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,96,8,128,1,fp8,fp8,0,0.16951199769973754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,96,1,128,1,float16,fp8,0,4.789238357543946
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,96,1,128,1,fp8,fp8,0,4.795603179931641
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,96,1,128,1,float16,float16,0,5.309692764282227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,96,2,128,1,float16,float16,0,5.392345428466797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,96,2,128,1,float16,fp8,0,4.78843994140625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,96,2,128,1,fp8,fp8,0,4.797351837158203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,96,4,128,1,float16,fp8,0,4.825022506713867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,96,4,128,1,float16,float16,0,5.461305618286133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,96,4,128,1,fp8,fp8,0,4.790902328491211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,96,8,128,1,float16,fp8,0,4.802534484863282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,96,8,128,1,float16,float16,0,5.61429443359375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,96,8,128,1,fp8,fp8,0,4.803209686279297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,96,128,1,float16,float16,0,3.7343006134033203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,96,128,1,float16,fp8,0,3.2138320922851564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,96,128,1,fp8,fp8,0,3.2104782104492187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,1,128,1,float16,float16,0,2.6383712768554686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,1,128,1,float16,fp8,0,2.407236862182617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,1,128,1,fp8,fp8,0,2.405147171020508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,2,128,1,float16,float16,0,2.7185712814331056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,2,128,1,float16,fp8,0,2.408008003234863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,2,128,1,fp8,fp8,0,2.405308723449707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,4,128,1,float16,float16,0,2.657419204711914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,4,128,1,float16,fp8,0,2.531510353088379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,4,128,1,fp8,fp8,0,2.4090848922729493
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,8,128,1,float16,float16,0,2.6906991958618165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,8,128,1,float16,fp8,0,2.505201530456543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,96,128,1,float16,float16,0,1.8849824905395507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,96,128,1,float16,fp8,0,1.7048000335693358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,96,8,128,1,fp8,fp8,0,2.4104175567626953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,96,128,1,fp8,fp8,0,1.6226272583007812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,1,128,1,float16,float16,0,1.3175392150878906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,1,128,1,float16,fp8,0,1.2135328292846679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,1,128,1,fp8,fp8,0,1.213710403442383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,2,128,1,float16,float16,0,1.322761631011963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,2,128,1,float16,fp8,0,1.2284416198730468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,2,128,1,fp8,fp8,0,1.2131008148193358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,4,128,1,float16,float16,0,1.3105279922485351
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,4,128,1,float16,fp8,0,1.2177295684814453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,4,128,1,fp8,fp8,0,1.214566421508789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,8,128,1,float16,float16,0,1.342404842376709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,8,128,1,float16,fp8,0,1.2741600036621095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,96,8,128,1,fp8,fp8,0,1.2174063682556153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,96,128,1,float16,float16,0,0.9553088188171387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,96,128,1,float16,fp8,0,0.8606207847595215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,96,128,1,fp8,fp8,0,0.8212271690368652
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,1,128,1,float16,float16,0,0.671235179901123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,1,128,1,float16,fp8,0,0.6181359767913819
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,1,128,1,fp8,fp8,0,0.6505296230316162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,2,128,1,float16,fp8,0,0.6193327903747559
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,2,128,1,fp8,fp8,0,0.6182159900665283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,4,128,1,float16,float16,0,0.6673168182373047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,4,128,1,float16,fp8,0,0.6187808036804199
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,4,128,1,fp8,fp8,0,0.6186448097229004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,8,128,1,float16,float16,0,0.6836880207061767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,8,128,1,float16,fp8,0,0.6200992107391358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,8,128,1,fp8,fp8,0,0.619216012954712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,96,128,1,float16,fp8,0,0.4223360061645508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,96,128,1,float16,float16,0,0.4937615871429443
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,96,128,1,fp8,fp8,0,0.4223616123199463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,1,128,1,float16,fp8,0,0.32541599273681643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,1,128,1,fp8,fp8,0,0.3197216033935547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,2,128,1,float16,float16,0,0.3466543912887573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,2,128,1,float16,fp8,0,0.32007360458374023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,2,128,1,fp8,fp8,0,0.32498879432678224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,96,2,128,1,float16,float16,0,0.659552001953125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,4,128,1,float16,float16,0,0.34679839611053465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,4,128,1,float16,fp8,0,0.3199887990951538
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,4,128,1,fp8,fp8,0,0.31987199783325193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,8,128,1,float16,float16,0,0.3536639928817749
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,8,128,1,float16,fp8,0,0.320196795463562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,8,128,1,fp8,fp8,0,0.3201600074768066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,96,128,1,float16,float16,0,0.25867519378662107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,96,128,1,float16,fp8,0,0.22265760898590087
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,96,128,1,fp8,fp8,0,0.2232192039489746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,1,128,1,float16,float16,0,0.18326079845428467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,1,128,1,float16,fp8,0,0.17047359943389892
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,1,128,1,fp8,fp8,0,0.17035839557647706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,2,128,1,float16,float16,0,0.18427040576934814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,2,128,1,float16,fp8,0,0.1704527974128723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,2,128,1,fp8,fp8,0,0.17027679681777955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,4,128,1,float16,float16,0,0.18557440042495726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,4,128,1,float16,fp8,0,0.17080960273742676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,4,128,1,fp8,fp8,0,0.1703279972076416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,8,128,1,float16,float16,0,0.18824319839477538
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,8,128,1,float16,fp8,0,0.17056959867477417
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,96,8,128,1,fp8,fp8,0,0.17040959596633912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,96,128,1,float16,float16,0,0.14401600360870362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,96,128,1,float16,fp8,0,0.12311999797821045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,96,128,1,fp8,fp8,0,0.12294880151748658
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,1,128,1,float16,float16,0,0.101528000831604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,1,128,1,float16,fp8,0,0.09463679790496826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,1,128,1,fp8,fp8,0,0.0944927990436554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,2,128,1,float16,fp8,0,0.09453759789466858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,2,128,1,fp8,fp8,0,0.09444000124931336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,4,128,1,float16,float16,0,0.10277760028839111
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,4,128,1,float16,fp8,0,0.09442239999771118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,4,128,1,fp8,fp8,0,0.09480159878730773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,8,128,1,float16,float16,0,0.10413440465927123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,8,128,1,float16,fp8,0,0.09473279714584351
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,8,128,1,fp8,fp8,0,0.09468960165977477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,96,1,128,1,float16,float16,0,0.3471168041229248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,96,2,128,1,float16,float16,0,0.10215200185775757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,96,1,128,1,float16,fp8,0,3.0934703826904295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,96,1,128,1,float16,float16,0,3.323072052001953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,96,1,128,1,fp8,fp8,0,3.0976703643798826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,96,2,128,1,float16,float16,0,3.3246959686279296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,96,2,128,1,float16,fp8,0,3.0929424285888674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,96,2,128,1,fp8,fp8,0,3.106500816345215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,96,4,128,1,float16,float16,0,3.4085662841796873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,96,4,128,1,fp8,fp8,0,3.0943023681640627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,96,8,128,1,float16,float16,0,3.455587387084961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,96,8,128,1,float16,fp8,0,3.1745584487915037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,96,8,128,1,fp8,fp8,0,3.0981424331665037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,96,4,128,1,float16,fp8,0,3.1148815155029297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,96,128,1,float16,float16,0,2.4817167282104493
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,96,128,1,float16,fp8,0,2.157222366333008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,96,128,1,fp8,fp8,0,2.156915283203125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,1,128,1,float16,fp8,0,1.5556816101074218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,1,128,1,float16,float16,0,1.6499231338500977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,1,128,1,fp8,fp8,0,1.5573295593261718
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,2,128,1,float16,float16,0,1.6329391479492188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,2,128,1,float16,fp8,0,1.5556303977966308
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,2,128,1,fp8,fp8,0,1.5871952056884766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,4,128,1,float16,float16,0,1.6689855575561523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,4,128,1,float16,fp8,0,1.5593999862670898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,4,128,1,fp8,fp8,0,1.5568943977355958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,8,128,1,float16,float16,0,1.7250352859497071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,8,128,1,float16,fp8,0,1.6074256896972656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,96,128,1,float16,float16,0,1.2439760208129882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,96,128,1,float16,fp8,0,1.1009712219238281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,96,128,1,fp8,fp8,0,1.0888544082641602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,96,8,128,1,fp8,fp8,0,1.558955192565918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,1,128,1,float16,float16,0,0.8261471748352051
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,1,128,1,float16,fp8,0,0.7888832092285156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,1,128,1,fp8,fp8,0,0.7881199836730957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,2,128,1,float16,float16,0,0.827883243560791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,2,128,1,float16,fp8,0,0.8108608245849609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,4,128,1,float16,float16,0,0.8408559799194336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,4,128,1,float16,fp8,0,0.7882160186767578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,4,128,1,fp8,fp8,0,0.8112272262573242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,8,128,1,float16,float16,0,0.8601951599121094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,8,128,1,fp8,fp8,0,0.7898528099060058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,96,128,1,float16,float16,0,0.6373760223388671
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,96,128,1,float16,fp8,0,0.5548223972320556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,96,128,1,fp8,fp8,0,0.554915189743042
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,1,128,1,float16,float16,0,0.4243055820465088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,2,128,1,fp8,fp8,0,0.7893775939941406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,2,128,1,float16,float16,0,0.4284736156463623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,2,128,1,float16,fp8,0,0.4038127899169922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,96,8,128,1,float16,fp8,0,0.7895919799804687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,2,128,1,fp8,fp8,0,0.4039167881011963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,4,128,1,float16,float16,0,0.4284095764160156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,4,128,1,float16,fp8,0,0.4046656131744385
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,4,128,1,fp8,fp8,0,0.4035632133483887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,8,128,1,float16,float16,0,0.44443202018737793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,8,128,1,float16,fp8,0,0.4041776180267334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,1,128,1,float16,fp8,0,0.4035664081573486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,96,128,1,float16,float16,0,0.32865281105041505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,8,128,1,fp8,fp8,0,0.40561761856079104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,96,128,1,float16,fp8,0,0.2872864007949829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,1,128,1,float16,float16,0,0.22577440738677979
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,96,128,1,fp8,fp8,0,0.28748159408569335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,1,128,1,float16,fp8,0,0.21131680011749268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,2,128,1,float16,float16,0,0.2237071990966797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,2,128,1,float16,fp8,0,0.21129119396209717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,4,128,1,float16,float16,0,0.22541120052337646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,4,128,1,float16,fp8,0,0.21121759414672853
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,4,128,1,fp8,fp8,0,0.21130080223083497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,8,128,1,float16,float16,0,0.23015520572662354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,8,128,1,float16,fp8,0,0.21146240234375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,8,128,1,fp8,fp8,0,0.2116447925567627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,96,128,1,float16,float16,0,0.17476799488067626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,96,128,1,float16,fp8,0,0.15378880500793457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,96,128,1,fp8,fp8,0,0.15402239561080933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,96,1,128,1,fp8,fp8,0,0.40406079292297364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,1,128,1,float16,float16,0,0.12286399602890015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,1,128,1,float16,fp8,0,0.11418399810791016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,1,128,1,fp8,fp8,0,0.21214399337768555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,1,128,1,fp8,fp8,0,0.11473599672317505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,2,128,1,float16,float16,0,0.12166399955749511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,2,128,1,float16,fp8,0,0.11471199989318848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,2,128,1,fp8,fp8,0,0.11489759683609009
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,4,128,1,float16,float16,0,0.12310880422592163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,4,128,1,float16,fp8,0,0.11520800590515137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,4,128,1,fp8,fp8,0,0.11482720375061035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,8,128,1,float16,float16,0,0.1251695990562439
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,8,128,1,float16,fp8,0,0.11501439809799194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,96,8,128,1,fp8,fp8,0,0.11489759683609009
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,96,128,1,float16,float16,0,0.09635999798774719
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,96,128,1,float16,fp8,0,0.08626719713211059
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,96,128,1,fp8,fp8,0,0.08621439933776856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,1,128,1,float16,fp8,0,0.06623039841651916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,2,128,1,float16,float16,0,0.07071359753608704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,1,128,1,fp8,fp8,0,0.06579999923706055
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,2,128,1,float16,fp8,0,0.06577119827270508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,2,128,1,fp8,fp8,0,0.06604959964752197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,4,128,1,float16,float16,0,0.07122560143470764
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,4,128,1,float16,fp8,0,0.06579359769821166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,4,128,1,fp8,fp8,0,0.06581599712371826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,8,128,1,float16,float16,0,0.07176799774169922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,8,128,1,float16,fp8,0,0.06579040288925171
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,8,128,1,fp8,fp8,0,0.06605439782142639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,96,2,128,1,fp8,fp8,0,0.21129119396209717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,96,1,128,1,float16,float16,0,0.07003039717674256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,96,1,128,1,float16,fp8,0,3.4461536407470703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,96,1,128,1,float16,float16,0,3.6144718170166015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,96,1,128,1,fp8,fp8,0,3.4423553466796877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,96,2,128,1,float16,float16,0,3.5529857635498048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,96,2,128,1,float16,fp8,0,3.4435039520263673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,96,2,128,1,fp8,fp8,0,3.4698448181152344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,96,4,128,1,float16,float16,0,3.6532527923583986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,96,4,128,1,float16,fp8,0,3.486310577392578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,96,4,128,1,fp8,fp8,0,3.445172882080078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,96,8,128,1,float16,float16,0,3.7696399688720703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,96,8,128,1,float16,fp8,0,3.446001434326172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,96,128,1,float16,float16,0,2.8499935150146483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,96,8,128,1,fp8,fp8,0,3.4488288879394533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,96,128,1,float16,fp8,0,2.509939193725586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,96,128,1,fp8,fp8,0,2.517584037780762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,1,128,1,float16,fp8,0,1.729520034790039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,1,128,1,fp8,fp8,0,1.7297552108764649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,2,128,1,float16,float16,0,1.7986415863037108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,2,128,1,float16,fp8,0,1.7290016174316407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,1,128,1,float16,float16,0,1.787513542175293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,2,128,1,fp8,fp8,0,1.7286415100097656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,4,128,1,float16,float16,0,1.8685087203979491
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,4,128,1,float16,fp8,0,1.7301216125488281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,4,128,1,fp8,fp8,0,1.7308464050292969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,8,128,1,float16,float16,0,1.8583776473999023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,8,128,1,float16,fp8,0,1.7886032104492187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,96,128,1,float16,float16,0,1.4275103569030763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,96,128,1,float16,fp8,0,1.265664005279541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,96,128,1,fp8,fp8,0,1.2673888206481934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,1,128,1,float16,float16,0,0.9032655715942383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,1,128,1,float16,fp8,0,0.8732928276062012
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,1,128,1,fp8,fp8,0,0.8743680000305176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,2,128,1,float16,float16,0,0.9047103881835937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,2,128,1,float16,fp8,0,0.8720911979675293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,2,128,1,fp8,fp8,0,0.8741680145263672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,4,128,1,float16,float16,0,0.9180831909179688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,4,128,1,float16,fp8,0,0.873249626159668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,96,8,128,1,fp8,fp8,0,1.7324399948120117
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,4,128,1,fp8,fp8,0,0.8748319625854493
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,8,128,1,float16,float16,0,0.9404831886291504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,8,128,1,float16,fp8,0,0.8741600036621093
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,96,8,128,1,fp8,fp8,0,0.8738800048828125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,96,128,1,float16,float16,0,0.7269264221191406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,96,128,1,fp8,fp8,0,0.6406960010528564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,96,128,1,float16,fp8,0,0.6472032070159912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,1,128,1,float16,float16,0,0.45977120399475097
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,1,128,1,float16,fp8,0,0.4454495906829834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,1,128,1,fp8,fp8,0,0.44469919204711916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,2,128,1,float16,float16,0,0.4656688213348389
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,2,128,1,float16,fp8,0,0.44436960220336913
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,2,128,1,fp8,fp8,0,0.4459519863128662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,4,128,1,float16,float16,0,0.46726560592651367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,4,128,1,fp8,fp8,0,0.4444240093231201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,8,128,1,float16,float16,0,0.4816112041473389
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,8,128,1,float16,fp8,0,0.44494237899780276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,8,128,1,fp8,fp8,0,0.44646081924438474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,96,128,1,float16,float16,0,0.3728928089141846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,96,128,1,float16,fp8,0,0.3296639919281006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,96,128,1,fp8,fp8,0,0.3292288064956665
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,1,128,1,float16,float16,0,0.24148321151733398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,1,128,1,fp8,fp8,0,0.23107678890228273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,2,128,1,float16,float16,0,0.24155840873718262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,2,128,1,float16,fp8,0,0.23088159561157226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,2,128,1,fp8,fp8,0,0.230784010887146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,4,128,1,float16,float16,0,0.24213919639587403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,4,128,1,float16,fp8,0,0.23145439624786376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,4,128,1,fp8,fp8,0,0.23054559230804444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,8,128,1,float16,float16,0,0.25004479885101316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,8,128,1,float16,fp8,0,0.23123199939727784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,8,128,1,fp8,fp8,0,0.2317280054092407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,96,128,1,float16,float16,0,0.1967728018760681
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,96,128,1,float16,fp8,0,0.17450560331344606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,96,128,1,fp8,fp8,0,0.17359520196914674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,96,4,128,1,float16,fp8,0,0.44490561485290525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,1,128,1,float16,float16,0,0.13037919998168945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,1,128,1,float16,fp8,0,0.123363196849823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,1,128,1,fp8,fp8,0,0.1231152057647705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,2,128,1,float16,float16,0,0.13075519800186158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,2,128,1,float16,fp8,0,0.12350239753723144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,2,128,1,fp8,fp8,0,0.1232800006866455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,4,128,1,float16,float16,0,0.13162239789962768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,4,128,1,float16,fp8,0,0.12313599586486816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,4,128,1,fp8,fp8,0,0.1230623960494995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,8,128,1,float16,float16,0,0.13478879928588866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,8,128,1,float16,fp8,0,0.1239583969116211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,96,8,128,1,fp8,fp8,0,0.12320319414138795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,96,128,1,float16,float16,0,0.11095520257949829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,96,128,1,float16,fp8,0,0.09523839950561523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,96,128,1,fp8,fp8,0,0.09477440118789673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,1,128,1,float16,float16,0,0.07294239997863769
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,1,128,1,float16,fp8,0,0.06820160150527954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,1,128,1,fp8,fp8,0,0.067958402633667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,2,128,1,float16,float16,0,0.07376160025596619
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,2,128,1,float16,fp8,0,0.067876797914505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,2,128,1,fp8,fp8,0,0.06806719899177552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,4,128,1,float16,float16,0,0.07398399710655212
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,4,128,1,float16,fp8,0,0.06791359782218934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,4,128,1,fp8,fp8,0,0.06783040165901184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,8,128,1,float16,float16,0,0.07529760003089905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,8,128,1,float16,fp8,0,0.06813920140266419
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,96,8,128,1,fp8,fp8,0,0.06826080083847046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,96,128,1,float16,float16,0,0.06353600025177002
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,96,128,1,float16,fp8,0,0.054176002740859985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,96,128,1,fp8,fp8,0,0.05346879959106445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,1,128,1,float16,float16,0,0.04527519941329956
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,1,128,1,fp8,fp8,0,0.04175519943237305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,2,128,1,float16,float16,0,0.04526880085468292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,2,128,1,float16,fp8,0,0.04154239892959595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,2,128,1,fp8,fp8,0,0.041764798760414126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,4,128,1,float16,float16,0,0.045256000757217404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,4,128,1,float16,fp8,0,0.0419840008020401
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,4,128,1,fp8,fp8,0,0.042475199699401854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,8,128,1,float16,float16,0,0.045310398936271666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,8,128,1,float16,fp8,0,0.04216319918632507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,8,128,1,fp8,fp8,0,0.042263999581336975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,96,1,128,1,float16,fp8,0,0.2297055959701538
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,96,1,128,1,float16,fp8,0,0.04270400106906891
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,96,1,128,1,float16,float16,0,2.7921056747436523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,96,1,128,1,float16,fp8,0,2.75229434967041
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,96,1,128,1,fp8,fp8,0,2.749558448791504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,96,2,128,1,float16,float16,0,2.799920082092285
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,96,2,128,1,float16,fp8,0,2.746444892883301
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,96,2,128,1,fp8,fp8,0,2.7504400253295898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,96,4,128,1,float16,float16,0,2.835327911376953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,96,4,128,1,float16,fp8,0,2.749505615234375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,96,4,128,1,fp8,fp8,0,2.7472208023071287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,96,8,128,1,float16,fp8,0,2.747875213623047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,96,8,128,1,float16,float16,0,2.9230432510375977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,96,8,128,1,fp8,fp8,0,2.750118446350098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,96,128,1,float16,float16,0,2.377894401550293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,1,128,1,float16,float16,0,1.4050512313842773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,96,128,1,float16,fp8,0,2.1296287536621095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,96,128,1,fp8,fp8,0,2.1390304565429688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,1,128,1,float16,fp8,0,1.3827424049377441
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,1,128,1,fp8,fp8,0,1.3842320442199707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,2,128,1,float16,float16,0,1.4050304412841796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,2,128,1,float16,fp8,0,1.3828559875488282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,2,128,1,fp8,fp8,0,1.3806079864501952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,4,128,1,float16,float16,0,1.4282192230224608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,4,128,1,float16,fp8,0,1.381601619720459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,4,128,1,fp8,fp8,0,1.3837759971618653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,8,128,1,float16,float16,0,1.4661168098449706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,8,128,1,float16,fp8,0,1.3825519561767579
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,96,128,1,float16,float16,0,1.2005904197692872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,96,8,128,1,fp8,fp8,0,1.3814031600952148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,96,128,1,float16,fp8,0,1.0737551689147948
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,1,128,1,float16,float16,0,0.7122015953063965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,96,128,1,fp8,fp8,0,1.0741151809692382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,1,128,1,float16,fp8,0,0.6988319873809814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,1,128,1,fp8,fp8,0,0.6984687805175781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,2,128,1,float16,float16,0,0.7133024215698243
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,2,128,1,float16,fp8,0,0.6982736110687255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,2,128,1,fp8,fp8,0,0.6978256225585937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,4,128,1,float16,float16,0,0.7213679790496826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,4,128,1,float16,fp8,0,0.6988255977630615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,4,128,1,fp8,fp8,0,0.6985856056213379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,8,128,1,float16,float16,0,0.742193603515625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,8,128,1,float16,fp8,0,0.6980048179626465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,96,8,128,1,fp8,fp8,0,0.698908805847168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,96,128,1,float16,float16,0,0.6082496166229248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,96,128,1,float16,fp8,0,0.5443568229675293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,1,128,1,float16,float16,0,0.36449599266052246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,1,128,1,float16,fp8,0,0.355947208404541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,96,128,1,fp8,fp8,0,0.5443535804748535
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,2,128,1,float16,float16,0,0.3646512031555176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,2,128,1,float16,fp8,0,0.35541601181030275
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,2,128,1,fp8,fp8,0,0.3554352045059204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,4,128,1,float16,float16,0,0.3697551965713501
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,4,128,1,float16,fp8,0,0.3552367925643921
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,4,128,1,fp8,fp8,0,0.3562160015106201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,8,128,1,float16,float16,0,0.3800575971603394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,8,128,1,float16,fp8,0,0.3564368009567261
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,8,128,1,fp8,fp8,0,0.3563904047012329
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,96,128,1,float16,float16,0,0.3132800102233887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,96,128,1,float16,fp8,0,0.2793312072753906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,96,128,1,fp8,fp8,0,0.27934720516204836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,1,128,1,float16,float16,0,0.19069440364837648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,1,128,1,float16,fp8,0,0.18455679416656495
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,1,128,1,fp8,fp8,0,0.18467520475387572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,2,128,1,float16,float16,0,0.19058239459991455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,2,128,1,float16,fp8,0,0.1846400022506714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,2,128,1,fp8,fp8,0,0.1846176028251648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,4,128,1,float16,float16,0,0.19277119636535645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,4,128,1,float16,fp8,0,0.18502559661865234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,4,128,1,fp8,fp8,0,0.18459839820861818
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,8,128,1,float16,float16,0,0.1984879970550537
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,8,128,1,float16,fp8,0,0.1855039954185486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,96,8,128,1,fp8,fp8,0,0.18472640514373778
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,96,128,1,float16,float16,0,0.16580959558486938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,96,128,1,float16,fp8,0,0.14767839908599853
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,96,128,1,fp8,fp8,0,0.14782880544662474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,1,128,1,float16,float16,0,0.10272639989852905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,1,128,1,fp8,fp8,0,0.09913280010223388
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,2,128,1,float16,float16,0,0.10292479991912842
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,2,128,1,float16,fp8,0,0.09865440130233764
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,2,128,1,fp8,fp8,0,0.09982399940490723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,4,128,1,float16,float16,0,0.10416959524154663
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,4,128,1,float16,fp8,0,0.09958080053329468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,4,128,1,fp8,fp8,0,0.0993391990661621
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,8,128,1,float16,float16,0,0.10746239423751831
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,8,128,1,fp8,fp8,0,0.1005776047706604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,96,128,1,float16,float16,0,0.09233760237693786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,96,128,1,float16,fp8,0,0.08064320087432861
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,96,128,1,fp8,fp8,0,0.08156639933586121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,1,128,1,float16,float16,0,0.058075201511383054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,1,128,1,float16,fp8,0,0.05562400221824646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,1,128,1,fp8,fp8,0,0.055550402402877806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,1,128,1,float16,fp8,0,0.09906399846076966
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,96,1,128,1,fp8,fp8,0,0.3565711975097656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,2,128,1,float16,float16,0,0.057599997520446776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,2,128,1,float16,fp8,0,0.05547999739646912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,2,128,1,fp8,fp8,0,0.055516797304153445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,4,128,1,float16,float16,0,0.05926560163497925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,4,128,1,float16,fp8,0,0.055504000186920165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,4,128,1,fp8,fp8,0,0.055587202310562134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,8,128,1,float16,float16,0,0.060476797819137576
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,8,128,1,float16,fp8,0,0.055478399991989134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,96,8,128,1,fp8,fp8,0,0.05546560287475586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,96,128,1,float16,float16,0,0.05289120078086853
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,96,128,1,float16,fp8,0,0.04596480131149292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,96,128,1,fp8,fp8,0,0.04549280107021332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,1,128,1,float16,fp8,0,0.03494719862937927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,1,128,1,fp8,fp8,0,0.0349375993013382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,2,128,1,float16,float16,0,0.036585599184036255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,2,128,1,float16,fp8,0,0.03485600054264069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,2,128,1,fp8,fp8,0,0.035006400942802426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,4,128,1,float16,float16,0,0.03704319894313812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,4,128,1,float16,fp8,0,0.03398880064487457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,4,128,1,fp8,fp8,0,0.03400799930095673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,8,128,1,float16,float16,0,0.03718560039997101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,8,128,1,float16,fp8,0,0.034443199634552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,8,128,1,fp8,fp8,0,0.034327998757362366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,96,128,1,float16,float16,0,0.030868801474571227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,96,128,1,float16,fp8,0,0.030856001377105712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,96,128,1,fp8,fp8,0,0.030873599648475646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,1,128,1,float16,float16,0,0.026321598887443544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,1,128,1,float16,fp8,0,0.024825599789619446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,1,128,1,fp8,fp8,0,0.02489120066165924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,2,128,1,float16,float16,0,0.02666560113430023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,2,128,1,float16,fp8,0,0.024736000597476958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,2,128,1,fp8,fp8,0,0.024849599599838255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,96,8,128,1,float16,fp8,0,0.09981279969215393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,4,128,1,float16,float16,0,0.02656640112400055
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,4,128,1,fp8,fp8,0,0.024751999974250795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,8,128,1,float16,float16,0,0.026977598667144775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,96,1,128,1,float16,float16,0,0.03511039912700653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,8,128,1,fp8,fp8,0,0.024929599463939668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,96,1,128,1,float16,float16,0,1.1895440101623536
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,96,1,128,1,float16,fp8,0,1.185579204559326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,96,1,128,1,fp8,fp8,0,1.1845104217529296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,96,2,128,1,float16,float16,0,1.1921008110046387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,4,128,1,float16,fp8,0,0.02476480007171631
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,96,8,128,1,float16,fp8,0,0.024833600223064422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,96,2,128,1,float16,fp8,0,1.1826864242553712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,96,2,128,1,fp8,fp8,0,1.1839360237121581
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,96,4,128,1,float16,float16,0,1.207369613647461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,96,4,128,1,float16,fp8,0,1.1805536270141601
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,96,4,128,1,fp8,fp8,0,1.1815103530883788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,96,8,128,1,float16,float16,0,1.2483712196350099
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,96,8,128,1,float16,fp8,0,1.1793248176574707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,96,8,128,1,fp8,fp8,0,1.1798064231872558
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,96,128,1,float16,float16,0,1.0613295555114746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,1,128,1,float16,float16,0,0.6003695964813233
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,96,128,1,float16,fp8,0,0.9639984130859375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,96,128,1,fp8,fp8,0,0.9592432022094727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,1,128,1,float16,fp8,0,0.5984799861907959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,1,128,1,fp8,fp8,0,0.5979375839233398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,2,128,1,float16,float16,0,0.6025055885314942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,2,128,1,float16,fp8,0,0.5967552185058593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,2,128,1,fp8,fp8,0,0.5980207920074463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,4,128,1,float16,float16,0,0.6103024005889892
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,4,128,1,float16,fp8,0,0.5979023933410644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,4,128,1,fp8,fp8,0,0.5962448120117188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,8,128,1,float16,float16,0,0.629527997970581
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,8,128,1,float16,fp8,0,0.5959919929504395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,96,8,128,1,fp8,fp8,0,0.5972799777984619
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,96,128,1,float16,float16,0,0.5381616115570068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,96,128,1,float16,fp8,0,0.4871359825134277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,1,128,1,float16,float16,0,0.30860319137573244
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,96,128,1,fp8,fp8,0,0.48749599456787107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,1,128,1,float16,fp8,0,0.3054080009460449
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,1,128,1,fp8,fp8,0,0.30534560680389405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,2,128,1,float16,float16,0,0.3084752082824707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,2,128,1,float16,fp8,0,0.3052239894866943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,2,128,1,fp8,fp8,0,0.3052704095840454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,4,128,1,float16,float16,0,0.31152160167694093
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,4,128,1,float16,fp8,0,0.3053839921951294
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,4,128,1,fp8,fp8,0,0.3050112009048462
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,8,128,1,float16,float16,0,0.3219327926635742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,8,128,1,float16,fp8,0,0.30474560260772704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,96,8,128,1,fp8,fp8,0,0.3053567886352539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,96,128,1,float16,float16,0,0.2785039901733398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,96,128,1,float16,fp8,0,0.2512351989746094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,96,128,1,fp8,fp8,0,0.25106399059295653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,1,128,1,float16,float16,0,0.16244640350341796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,1,128,1,float16,fp8,0,0.15987839698791503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,1,128,1,fp8,fp8,0,0.16002720594406128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,2,128,1,float16,float16,0,0.16194720268249513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,2,128,1,float16,fp8,0,0.16007039546966553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,2,128,1,fp8,fp8,0,0.15992799997329712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,4,128,1,float16,float16,0,0.1640735983848572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,4,128,1,float16,fp8,0,0.15997920036315919
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,4,128,1,fp8,fp8,0,0.16004799604415892
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,8,128,1,float16,float16,0,0.16931519508361817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,8,128,1,float16,fp8,0,0.16006720066070557
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,96,8,128,1,fp8,fp8,0,0.15979199409484862
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,96,128,1,float16,float16,0,0.14925919771194457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,1,128,1,float16,float16,0,0.08937439918518067
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,1,128,1,float16,fp8,0,0.08770719766616822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,1,128,1,fp8,fp8,0,0.08770719766616822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,2,128,1,float16,float16,0,0.09030719995498657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,2,128,1,float16,fp8,0,0.08797760009765625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,2,128,1,fp8,fp8,0,0.08680959939956664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,4,128,1,float16,float16,0,0.09044319987297059
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,4,128,1,float16,fp8,0,0.08659520149230956
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,4,128,1,fp8,fp8,0,0.08670719861984252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,8,128,1,float16,float16,0,0.09238560199737549
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,8,128,1,float16,fp8,0,0.08728960156440735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,8,128,1,fp8,fp8,0,0.08733119964599609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,96,128,1,float16,float16,0,0.08432639837265014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,96,128,1,float16,fp8,0,0.07546399831771851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,96,128,1,fp8,fp8,0,0.07481279969215393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,1,128,1,float16,float16,0,0.05142239928245544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,1,128,1,float16,fp8,0,0.04932959973812103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,1,128,1,fp8,fp8,0,0.04937280118465424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,96,128,1,float16,fp8,0,0.1334383964538574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,96,96,128,1,fp8,fp8,0,0.13341439962387086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,2,128,1,float16,float16,0,0.05154719948768616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,2,128,1,float16,fp8,0,0.049374398589134214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,4,128,1,float16,float16,0,0.05299519896507263
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,4,128,1,float16,fp8,0,0.049374398589134214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,4,128,1,fp8,fp8,0,0.04933759868144989
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,8,128,1,float16,fp8,0,0.049395200610160825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,8,128,1,float16,float16,0,0.05345919728279114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,8,128,1,fp8,fp8,0,0.04949280023574829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,96,128,1,float16,float16,0,0.04568960070610047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,96,128,1,float16,fp8,0,0.04111360013484955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,96,128,1,fp8,fp8,0,0.041198399662971494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,1,128,1,float16,float16,0,0.02996479868888855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,1,128,1,float16,fp8,0,0.028971201181411742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,1,128,1,fp8,fp8,0,0.02892799973487854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,2,128,1,float16,float16,0,0.03091680109500885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,2,128,1,float16,fp8,0,0.028968000411987306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,2,128,1,fp8,fp8,0,0.02889440059661865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,4,128,1,float16,float16,0,0.030844798684120177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,4,128,1,float16,fp8,0,0.028857600688934327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,4,128,1,fp8,fp8,0,0.028884801268577575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,8,128,1,float16,float16,0,0.03105599880218506
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,8,128,1,float16,fp8,0,0.028860801458358766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,96,8,128,1,fp8,fp8,0,0.028835201263427736
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,96,128,1,float16,float16,0,0.026788800954818726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,96,128,1,float16,fp8,0,0.026833599805831908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,96,128,1,fp8,fp8,0,0.02683520019054413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,1,128,1,float16,float16,0,0.02266400009393692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,1,128,1,float16,fp8,0,0.020948800444602966
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,1,128,1,fp8,fp8,0,0.02070239931344986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,2,128,1,float16,float16,0,0.022724799811840057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,2,128,1,float16,fp8,0,0.020729599893093108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,4,128,1,float16,float16,0,0.022711999714374542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,4,128,1,fp8,fp8,0,0.020734399557113647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,8,128,1,float16,float16,0,0.02272319942712784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,8,128,1,float16,fp8,0,0.022700800001621245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,8,128,1,fp8,fp8,0,0.022473600506782532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,96,128,1,float16,float16,0,0.0186271995306015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,96,128,1,float16,fp8,0,0.0186271995306015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,96,128,1,fp8,fp8,0,0.01865600049495697
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,1,128,1,float16,float16,0,0.01544319987297058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,1,128,1,float16,fp8,0,0.015083199739456177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,96,2,128,1,fp8,fp8,0,0.04951519966125488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,1,128,1,fp8,fp8,0,0.014500799775123595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,2,128,1,float16,float16,0,0.016407999396324157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,2,128,1,float16,fp8,0,0.014459200203418732
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,2,128,1,fp8,fp8,0,0.014641599357128143
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,4,128,1,float16,fp8,0,0.014727999269962311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,4,128,1,fp8,fp8,0,0.014696000516414643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,8,128,1,float16,float16,0,0.016441600024700166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,8,128,1,float16,fp8,0,0.014585599303245544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,8,128,1,fp8,fp8,0,0.014611199498176575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,96,1,128,1,float16,float16,0,0.706279993057251
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,2,128,1,fp8,fp8,0,0.021568000316619873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,96,1,128,1,float16,fp8,0,0.7093567848205566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,96,1,128,1,fp8,fp8,0,0.709878396987915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,96,2,128,1,float16,float16,0,0.7063231945037842
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,96,4,128,1,float16,float16,0,0.014556799829006196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,96,2,128,1,float16,fp8,0,0.7093152046203614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,96,2,128,1,fp8,fp8,0,0.7079520225524902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,96,4,128,1,float16,float16,0,0.7145840167999268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,96,4,128,1,float16,fp8,0,0.7091248035430908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,96,4,128,1,fp8,fp8,0,0.7075888156890869
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,96,8,128,1,float16,float16,0,0.7333968162536622
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,96,8,128,1,float16,fp8,0,0.7087440013885498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,96,4,128,1,float16,fp8,0,0.020838400721549986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,96,8,128,1,fp8,fp8,0,0.7068895816802978
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,96,128,1,float16,float16,0,0.585203218460083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,96,128,1,float16,fp8,0,0.5414063930511475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,96,128,1,fp8,fp8,0,0.5395679950714112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,1,128,1,float16,float16,0,0.3589776039123535
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,1,128,1,float16,fp8,0,0.3591840028762817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,1,128,1,fp8,fp8,0,0.360097599029541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,2,128,1,float16,float16,0,0.3590912103652954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,2,128,1,float16,fp8,0,0.35955519676208497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,2,128,1,fp8,fp8,0,0.3599584102630615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,4,128,1,float16,float16,0,0.3647216081619263
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,4,128,1,float16,fp8,0,0.3590384006500244
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,4,128,1,fp8,fp8,0,0.35881919860839845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,8,128,1,float16,float16,0,0.37245280742645265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,8,128,1,float16,fp8,0,0.36037919521331785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,96,8,128,1,fp8,fp8,0,0.3591439962387085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,96,128,1,float16,float16,0,0.2988912105560303
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,96,128,1,float16,fp8,0,0.276638388633728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,96,128,1,fp8,fp8,0,0.2766815900802612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,1,128,1,float16,float16,0,0.1865664005279541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,1,128,1,float16,fp8,0,0.18618079423904418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,1,128,1,fp8,fp8,0,0.1859392046928406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,2,128,1,float16,float16,0,0.18668320178985595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,2,128,1,float16,fp8,0,0.18652000427246093
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,2,128,1,fp8,fp8,0,0.1861296057701111
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,4,128,1,float16,float16,0,0.18865760564804077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,4,128,1,float16,fp8,0,0.18661279678344728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,4,128,1,fp8,fp8,0,0.18547680377960205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,8,128,1,float16,float16,0,0.19297120571136475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,8,128,1,float16,fp8,0,0.1854416012763977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,96,8,128,1,fp8,fp8,0,0.18585280179977418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,96,128,1,float16,fp8,0,0.14466079473495483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,96,128,1,fp8,fp8,0,0.1440991997718811
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,1,128,1,float16,fp8,0,0.09855679869651794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,1,128,1,fp8,fp8,0,0.0985759973526001
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,2,128,1,float16,float16,0,0.09935039877891541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,2,128,1,float16,fp8,0,0.09846559762954712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,2,128,1,fp8,fp8,0,0.09860000014305115
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,4,128,1,float16,float16,0,0.10076320171356201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,4,128,1,float16,fp8,0,0.09868959784507751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,4,128,1,fp8,fp8,0,0.09888319969177246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,8,128,1,float16,float16,0,0.10272959470748902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,8,128,1,float16,fp8,0,0.09874079823493957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,8,128,1,fp8,fp8,0,0.09870719909667969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,96,128,1,float16,float16,0,0.08557440042495727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,96,128,1,float16,fp8,0,0.07873280048370361
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,96,128,1,fp8,fp8,0,0.07862560153007507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,96,128,1,float16,float16,0,0.15667999982833863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,1,128,1,float16,float16,0,0.055499202013015746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,1,128,1,fp8,fp8,0,0.05343359708786011
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,1,128,1,float16,fp8,0,0.05353119969367981
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,2,128,1,float16,float16,0,0.05547999739646912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,2,128,1,float16,fp8,0,0.05365920066833496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,2,128,1,fp8,fp8,0,0.05345119833946228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,4,128,1,float16,float16,0,0.056276798248291016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,4,128,1,float16,fp8,0,0.05344480276107788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,4,128,1,fp8,fp8,0,0.05352479815483093
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,8,128,1,float16,float16,0,0.05758879780769348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,8,128,1,float16,fp8,0,0.05547040104866028
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,96,128,1,float16,float16,0,0.049167999625205995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,96,128,1,fp8,fp8,0,0.04525440037250519
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,1,128,1,float16,float16,0,0.03326399922370911
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,1,128,1,float16,fp8,0,0.03322719931602478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,1,128,1,fp8,fp8,0,0.03332639932632446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,2,128,1,float16,float16,0,0.03319360017776489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,2,128,1,float16,fp8,0,0.033083200454711914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,2,128,1,fp8,fp8,0,0.033108800649642944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,4,128,1,float16,float16,0,0.03307200074195862
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,4,128,1,float16,fp8,0,0.03314880132675171
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,4,128,1,fp8,fp8,0,0.033036801218986514
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,8,128,1,float16,float16,0,0.03439359962940216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,8,128,1,float16,fp8,0,0.03303999900817871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,96,1,128,1,float16,float16,0,0.09935039877891541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,8,128,1,fp8,fp8,0,0.033108800649642944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,96,128,1,float16,float16,0,0.026761600375175477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,96,128,1,float16,fp8,0,0.026732799410820008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,96,128,1,fp8,fp8,0,0.026763200759887695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,1,128,1,float16,float16,0,0.02080159932374954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,1,128,1,float16,fp8,0,0.020657600462436677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,2,128,1,float16,float16,0,0.020684799551963805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,1,128,1,fp8,fp8,0,0.02067520022392273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,2,128,1,float16,fp8,0,0.020686399936676026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,2,128,1,fp8,fp8,0,0.020745599269866945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,4,128,1,float16,float16,0,0.020721599459648132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,4,128,1,float16,fp8,0,0.020747199654579163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,4,128,1,fp8,fp8,0,0.02064639925956726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,96,96,128,1,float16,fp8,0,0.04529440104961395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,8,128,1,float16,float16,0,0.02117920070886612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,8,128,1,float16,fp8,0,0.02064319998025894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,96,8,128,1,fp8,fp8,0,0.020635199546813966
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,96,128,1,float16,fp8,0,0.018636800348758698
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,96,128,1,fp8,fp8,0,0.018745599687099455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,1,128,1,float16,float16,0,0.016518400609493257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,1,128,1,float16,fp8,0,0.01660960018634796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,1,128,1,fp8,fp8,0,0.014796799421310425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,2,128,1,float16,float16,0,0.01660960018634796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,2,128,1,float16,fp8,0,0.014814400672912597
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,2,128,1,fp8,fp8,0,0.01652639955282211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,4,128,1,float16,float16,0,0.016553600132465363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,4,128,1,float16,fp8,0,0.01653439998626709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,4,128,1,fp8,fp8,0,0.014902399480342865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,8,128,1,float16,float16,0,0.01652960032224655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,8,128,1,float16,fp8,0,0.015465599298477174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,8,128,1,fp8,fp8,0,0.0162432000041008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,96,128,1,float16,float16,0,0.014502400159835815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,96,128,1,float16,fp8,0,0.012708799540996551
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,96,128,1,fp8,fp8,0,0.012476799637079239
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,1,128,1,float16,float16,0,0.012459199875593185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,1,128,1,fp8,fp8,0,0.012427199631929398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,2,128,1,float16,float16,0,0.012368000298738479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,2,128,1,float16,fp8,0,0.012408000230789185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,2,128,1,fp8,fp8,0,0.012409599870443344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,4,128,1,float16,float16,0,0.012483199685811996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,4,128,1,float16,fp8,0,0.012559999525547028
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,4,128,1,fp8,fp8,0,0.012492799758911132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,96,8,128,1,fp8,fp8,0,0.05427359938621521
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,8,128,1,float16,float16,0,0.012399999797344208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,8,128,1,float16,fp8,0,0.012392000108957291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,96,96,128,1,float16,float16,0,0.01929280012845993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,8,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,96,1,128,1,float16,float16,0,0.5369167804718018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,96,1,128,1,float16,fp8,0,0.5315631866455078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,96,1,128,1,float16,fp8,0,0.011345600336790084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,96,2,128,1,float16,float16,0,0.535152006149292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,96,2,128,1,float16,fp8,0,0.5326096057891846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,96,2,128,1,fp8,fp8,0,0.5312352180480957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,96,4,128,1,float16,float16,0,0.5408720016479492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,96,4,128,1,float16,fp8,0,0.5313231945037842
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,96,4,128,1,fp8,fp8,0,0.5324528217315674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,96,8,128,1,float16,float16,0,0.5483888149261474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,96,8,128,1,float16,fp8,0,0.5314047813415528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,96,8,128,1,fp8,fp8,0,0.5309631824493408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,96,128,1,float16,float16,0,0.3850464105606079
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,96,128,1,float16,fp8,0,0.3601263999938965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,96,128,1,fp8,fp8,0,0.36066560745239257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,1,128,1,float16,float16,0,0.2736272096633911
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,1,128,1,float16,fp8,0,0.2710655927658081
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,1,128,1,fp8,fp8,0,0.2705136060714722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,2,128,1,float16,float16,0,0.2743072032928467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,2,128,1,float16,fp8,0,0.2706239938735962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,2,128,1,fp8,fp8,0,0.2708271980285645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,4,128,1,float16,fp8,0,0.27059359550476075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,4,128,1,fp8,fp8,0,0.27071518898010255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,8,128,1,float16,float16,0,0.28059680461883546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,8,128,1,float16,fp8,0,0.2704335927963257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,8,128,1,fp8,fp8,0,0.2703200101852417
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,96,128,1,float16,float16,0,0.1981663942337036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,96,128,1,float16,fp8,0,0.1847808003425598
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,96,128,1,fp8,fp8,0,0.18530559539794922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,1,128,1,float16,float16,0,0.1422271966934204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,1,128,1,float16,fp8,0,0.13960800170898438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,1,128,1,fp8,fp8,0,0.139518404006958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,2,128,1,float16,float16,0,0.14292800426483154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,2,128,1,float16,fp8,0,0.13950719833374023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,2,128,1,fp8,fp8,0,0.139684796333313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,4,128,1,float16,float16,0,0.1434880018234253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,4,128,1,float16,fp8,0,0.13946399688720704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,4,128,1,fp8,fp8,0,0.13950400352478026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,96,1,128,1,fp8,fp8,0,0.5321616172790528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,8,128,1,float16,float16,0,0.14566400051116943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,8,128,1,float16,fp8,0,0.13949120044708252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,96,8,128,1,fp8,fp8,0,0.14158400297164916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,96,128,1,float16,fp8,0,0.09850720167160035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,96,128,1,fp8,fp8,0,0.09945120215415955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,1,128,1,float16,float16,0,0.07652000188827515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,1,128,1,float16,fp8,0,0.0739791989326477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,1,128,1,fp8,fp8,0,0.07393919825553893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,2,128,1,float16,float16,0,0.07802240252494812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,2,128,1,float16,fp8,0,0.07404159903526306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,2,128,1,fp8,fp8,0,0.07397440075874329
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,4,128,1,float16,float16,0,0.07800959944725036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,96,4,128,1,float16,float16,0,0.27671198844909667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,4,128,1,fp8,fp8,0,0.07389280200004578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,8,128,1,float16,float16,0,0.07831839919090271
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,8,128,1,float16,fp8,0,0.07403200268745422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,8,128,1,fp8,fp8,0,0.0740015983581543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,96,128,1,float16,float16,0,0.0578000009059906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,96,128,1,float16,fp8,0,0.053540802001953124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,96,128,1,fp8,fp8,0,0.05341280102729797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,1,128,1,float16,float16,0,0.043196800351142886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,1,128,1,float16,fp8,0,0.041126400232315063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,1,128,1,fp8,fp8,0,0.041264000535011294
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,2,128,1,float16,float16,0,0.043140798807144165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,2,128,1,float16,fp8,0,0.04127199947834015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,2,128,1,fp8,fp8,0,0.04184960126876831
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,4,128,1,float16,float16,0,0.043243199586868286
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,4,128,1,float16,fp8,0,0.041252800822258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,4,128,1,fp8,fp8,0,0.041233599185943604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,8,128,1,float16,fp8,0,0.04142560064792633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,8,128,1,fp8,fp8,0,0.04147680103778839
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,96,128,1,float16,float16,0,0.031046399474143983
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,96,128,1,float16,fp8,0,0.0310479998588562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,96,128,1,fp8,fp8,0,0.0311055988073349
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,1,128,1,float16,float16,0,0.026748800277709962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,96,128,1,float16,float16,0,0.10503200292587281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,1,128,1,float16,fp8,0,0.025041601061820982
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,1,128,1,fp8,fp8,0,0.024831999838352204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,2,128,1,float16,fp8,0,0.02476319968700409
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,2,128,1,fp8,fp8,0,0.024825599789619446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,4,128,1,float16,float16,0,0.026875200867652892
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,4,128,1,float16,fp8,0,0.025755199790000915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,4,128,1,fp8,fp8,0,0.02592160105705261
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,96,4,128,1,float16,fp8,0,0.07522879838943482
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,8,128,1,float16,float16,0,0.02677919864654541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,8,128,1,float16,fp8,0,0.026740801334381104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,8,128,1,fp8,fp8,0,0.025387200713157653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,96,128,1,float16,float16,0,0.020608000457286835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,96,128,1,float16,fp8,0,0.019508799910545348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,96,128,1,fp8,fp8,0,0.020609599351882935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,1,128,1,float16,float16,0,0.01664319932460785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,1,128,1,float16,fp8,0,0.01671839952468872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,1,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,2,128,1,float16,float16,0,0.01863040030002594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,2,128,1,float16,fp8,0,0.016515199840068818
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,2,128,1,fp8,fp8,0,0.016710400581359863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,4,128,1,float16,float16,0,0.017561599612236023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,4,128,1,float16,fp8,0,0.016648000478744505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,4,128,1,fp8,fp8,0,0.01655679941177368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,8,128,1,float16,float16,0,0.018212799727916718
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,8,128,1,float16,fp8,0,0.01655520051717758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,96,8,128,1,float16,float16,0,0.04320479929447174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,96,8,128,1,fp8,fp8,0,0.016590400040149687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,96,128,1,float16,float16,0,0.016574400663375854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,96,128,1,float16,fp8,0,0.014575999975204468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,96,128,1,fp8,fp8,0,0.014535999298095703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,1,128,1,float16,float16,0,0.014510400593280792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,1,128,1,float16,fp8,0,0.012780800461769104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,1,128,1,fp8,fp8,0,0.012574400007724761
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,2,128,1,float16,float16,0,0.014505599439144135
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,2,128,1,float16,fp8,0,0.01260959953069687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,2,128,1,fp8,fp8,0,0.01308639943599701
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,4,128,1,float16,fp8,0,0.013385599851608277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,4,128,1,float16,float16,0,0.014529600739479065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,4,128,1,fp8,fp8,0,0.013166399300098419
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,8,128,1,float16,float16,0,0.014452800154685974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,8,128,1,float16,fp8,0,0.012918399274349212
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,96,8,128,1,fp8,fp8,0,0.014457599818706512
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,96,128,1,float16,float16,0,0.012700800597667695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,96,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,96,128,1,fp8,fp8,0,0.01061440035700798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,96,2,128,1,float16,float16,0,0.026921600103378296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,1,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,1,128,1,fp8,fp8,0,0.01053759977221489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,2,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,2,128,1,float16,fp8,0,0.010518400371074677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,2,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,4,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,4,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,4,128,1,fp8,fp8,0,0.010609599947929382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,8,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,8,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,96,1,128,1,float16,float16,0,0.45555682182312013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,96,1,128,1,float16,fp8,0,0.44658241271972654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,96,1,128,1,fp8,fp8,0,0.44646401405334474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,96,2,128,1,float16,float16,0,0.45512800216674804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,1,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,96,2,128,1,float16,fp8,0,0.4463935852050781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,96,2,128,1,fp8,fp8,0,0.4475711822509766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,96,4,128,1,float16,float16,0,0.45707039833068847
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,96,8,128,1,float16,fp8,0,0.010608000308275222
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,96,4,128,1,float16,fp8,0,0.44681758880615235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,96,4,128,1,fp8,fp8,0,0.446670389175415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,96,8,128,1,float16,fp8,0,0.44720959663391113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,96,8,128,1,float16,float16,0,0.4609407901763916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,96,128,1,float16,float16,0,0.2869312047958374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,96,8,128,1,fp8,fp8,0,0.4466720104217529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,96,128,1,float16,fp8,0,0.27463200092315676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,96,128,1,fp8,fp8,0,0.27394559383392336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,1,128,1,float16,fp8,0,0.22786400318145753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,1,128,1,fp8,fp8,0,0.22928159236907958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,2,128,1,float16,float16,0,0.23394238948822021
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,2,128,1,float16,fp8,0,0.2279599905014038
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,2,128,1,fp8,fp8,0,0.2289664030075073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,4,128,1,float16,float16,0,0.23480160236358644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,4,128,1,float16,fp8,0,0.2275439977645874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,4,128,1,fp8,fp8,0,0.22952799797058104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,8,128,1,float16,float16,0,0.23698720932006836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,96,128,1,float16,float16,0,0.14982399940490723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,96,128,1,float16,fp8,0,0.14205600023269654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,96,128,1,fp8,fp8,0,0.14180320501327515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,1,128,1,float16,float16,0,0.12176480293273925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,1,128,1,float16,fp8,0,0.11743839979171752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,1,128,1,fp8,fp8,0,0.11723359823226928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,2,128,1,float16,float16,0,0.12290400266647339
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,2,128,1,float16,fp8,0,0.11727839708328247
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,2,128,1,fp8,fp8,0,0.11720000505447388
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,4,128,1,float16,float16,0,0.12314399480819702
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,1,128,1,float16,float16,0,0.23349919319152831
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,4,128,1,float16,fp8,0,0.11862239837646485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,8,128,1,float16,float16,0,0.12357280254364014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,8,128,1,float16,fp8,0,0.11891520023345947
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,8,128,1,fp8,fp8,0,0.11886719465255738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,8,128,1,float16,fp8,0,0.22778239250183105
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,96,128,1,float16,float16,0,0.07982239723205567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,96,128,1,float16,fp8,0,0.07453920245170594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,96,128,1,fp8,fp8,0,0.07594239711761475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,1,128,1,float16,float16,0,0.06568959951400757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,1,128,1,float16,fp8,0,0.0635968029499054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,1,128,1,fp8,fp8,0,0.06363840103149414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,2,128,1,float16,float16,0,0.06575520038604736
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,2,128,1,float16,fp8,0,0.06368160247802734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,2,128,1,fp8,fp8,0,0.06385440230369568
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,4,128,1,float16,float16,0,0.06573600172996522
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,4,128,1,float16,fp8,0,0.06374239921569824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,8,128,1,float16,float16,0,0.06583840250968934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,8,128,1,float16,fp8,0,0.06372960209846497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,8,128,1,fp8,fp8,0,0.06374239921569824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,96,128,1,float16,float16,0,0.04476479887962341
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,96,128,1,float16,fp8,0,0.04129120111465454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,96,128,1,fp8,fp8,0,0.041233599185943604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,1,128,1,float16,float16,0,0.037176001071929934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,1,128,1,float16,fp8,0,0.03637759983539581
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,1,128,1,fp8,fp8,0,0.03695839941501618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,96,4,128,1,fp8,fp8,0,0.11715199947357177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,2,128,1,float16,float16,0,0.037150400876998904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,2,128,1,float16,fp8,0,0.03683040142059326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,2,128,1,fp8,fp8,0,0.03678080141544342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,4,128,1,float16,float16,0,0.03706879913806915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,4,128,1,float16,fp8,0,0.03501439988613129
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,4,128,1,fp8,fp8,0,0.03514240086078644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,8,128,1,float16,float16,0,0.03710240125656128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,96,8,128,1,fp8,fp8,0,0.22897601127624512
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,8,128,1,fp8,fp8,0,0.037115201354026794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,96,128,1,float16,float16,0,0.026745599508285523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,96,128,1,float16,fp8,0,0.025054401159286498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,96,128,1,fp8,fp8,0,0.026771199703216553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,96,4,128,1,fp8,fp8,0,0.06377599835395813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,1,128,1,float16,fp8,0,0.02274879962205887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,1,128,1,fp8,fp8,0,0.02274879962205887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,2,128,1,float16,float16,0,0.024238400161266327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,2,128,1,float16,fp8,0,0.02277279943227768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,2,128,1,fp8,fp8,0,0.02284799963235855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,4,128,1,float16,float16,0,0.022729599475860597
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,4,128,1,float16,fp8,0,0.023081600666046143
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,4,128,1,fp8,fp8,0,0.022694399952888487
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,8,128,1,float16,float16,0,0.022761599719524385
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,8,128,1,float16,fp8,0,0.022703999280929567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,8,128,1,fp8,fp8,0,0.022675199806690215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,96,128,1,float16,float16,0,0.018585599958896637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,96,128,1,float16,fp8,0,0.016545599699020384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,96,128,1,fp8,fp8,0,0.016631999611854555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,1,128,1,float16,float16,0,0.016595199704170227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,1,128,1,float16,fp8,0,0.014510400593280792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,1,128,1,fp8,fp8,0,0.014580799639225006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,2,128,1,float16,float16,0,0.0165583997964859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,2,128,1,float16,fp8,0,0.014588800072669984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,96,8,128,1,float16,fp8,0,0.03508960008621216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,2,128,1,fp8,fp8,0,0.014574399590492249
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,4,128,1,float16,float16,0,0.016487999260425566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,4,128,1,float16,fp8,0,0.015542399883270264
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,4,128,1,fp8,fp8,0,0.015219199657440185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,8,128,1,float16,float16,0,0.01656640022993088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,8,128,1,float16,fp8,0,0.014582400023937226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,96,8,128,1,fp8,fp8,0,0.015132799744606018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,96,128,1,float16,float16,0,0.014591999351978302
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,96,128,1,float16,fp8,0,0.012572799623012543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,96,128,1,fp8,fp8,0,0.012462399899959564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,96,1,128,1,float16,float16,0,0.02481440007686615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,1,128,1,float16,fp8,0,0.012483199685811996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,1,128,1,fp8,fp8,0,0.012579199671745301
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,2,128,1,float16,float16,0,0.012603199481964112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,2,128,1,float16,fp8,0,0.012593600153923034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,2,128,1,fp8,fp8,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,4,128,1,float16,float16,0,0.012544000148773193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,4,128,1,float16,fp8,0,0.01249760016798973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,4,128,1,fp8,fp8,0,0.012569600343704223
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,8,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,8,128,1,float16,fp8,0,0.012515200674533844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,8,128,1,fp8,fp8,0,0.012455999851226807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,96,128,1,float16,float16,0,0.01255359947681427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,96,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,96,128,1,fp8,fp8,0,0.010635200142860412
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,1,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,1,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,2,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,2,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,4,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,2,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,4,128,1,float16,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,4,128,1,fp8,fp8,0,0.010599999874830245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,8,128,1,float16,float16,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,8,128,1,fp8,fp8,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,8,128,1,float16,fp8,0,0.010552000254392624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,96,1,128,1,float16,float16,0,0.43323359489440916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,96,1,128,1,float16,fp8,0,0.40803680419921873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,96,1,128,1,fp8,fp8,0,0.4083424091339111
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,96,2,128,1,float16,float16,0,0.4334991931915283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,96,1,128,1,float16,float16,0,0.012449599802494049
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,96,2,128,1,float16,fp8,0,0.4082304000854492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,96,2,128,1,fp8,fp8,0,0.40822720527648926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,96,4,128,1,float16,float16,0,0.4367504119873047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,96,4,128,1,float16,fp8,0,0.4080383777618408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,96,4,128,1,fp8,fp8,0,0.4080944061279297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,96,1,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,96,8,128,1,float16,fp8,0,0.40791997909545896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,96,128,1,float16,float16,0,0.24912478923797607
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,96,8,128,1,fp8,fp8,0,0.4080207824707031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,96,128,1,float16,fp8,0,0.2316943883895874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,96,128,1,fp8,fp8,0,0.23172481060028077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,1,128,1,float16,float16,0,0.22231199741363525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,1,128,1,float16,fp8,0,0.2087536096572876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,1,128,1,fp8,fp8,0,0.20827999114990234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,2,128,1,float16,fp8,0,0.20839519500732423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,2,128,1,fp8,fp8,0,0.20836000442504882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,4,128,1,float16,float16,0,0.2233328104019165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,4,128,1,float16,fp8,0,0.20758559703826904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,4,128,1,fp8,fp8,0,0.20908000469207763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,8,128,1,float16,float16,0,0.22363998889923095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,8,128,1,float16,fp8,0,0.20873761177062988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,8,128,1,fp8,fp8,0,0.20733439922332764
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,96,128,1,float16,float16,0,0.12948640584945678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,96,128,1,float16,fp8,0,0.12098560333251954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,96,128,1,fp8,fp8,0,0.12090400457382203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,1,128,1,float16,float16,0,0.11486400365829467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,1,128,1,float16,fp8,0,0.10880800485610961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,1,128,1,fp8,fp8,0,0.10870560407638549
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,2,128,1,float16,float16,0,0.11492320299148559
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,2,128,1,float16,fp8,0,0.10873759984970092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,96,8,128,1,float16,float16,0,0.43888001441955565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,2,128,1,fp8,fp8,0,0.10867840051651001
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,4,128,1,float16,float16,0,0.11681439876556396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,4,128,1,float16,fp8,0,0.10883200168609619
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,4,128,1,fp8,fp8,0,0.10868959426879883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,8,128,1,float16,float16,0,0.1149664044380188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,8,128,1,float16,fp8,0,0.10866719484329224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,96,8,128,1,fp8,fp8,0,0.10871360301971436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,96,128,1,float16,float16,0,0.06986399888992309
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,96,128,1,float16,fp8,0,0.06373440027236939
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,96,128,1,fp8,fp8,0,0.06367200016975402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,1,128,1,float16,float16,0,0.06176000237464905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,96,2,128,1,float16,float16,0,0.22338240146636962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,1,128,1,float16,fp8,0,0.0575984001159668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,1,128,1,fp8,fp8,0,0.05802559852600098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,2,128,1,float16,float16,0,0.06349120140075684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,2,128,1,fp8,fp8,0,0.05757279992103577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,4,128,1,float16,float16,0,0.06334879994392395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,4,128,1,float16,fp8,0,0.05762400031089783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,4,128,1,fp8,fp8,0,0.05831360220909119
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,8,128,1,float16,float16,0,0.06166239976882935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,8,128,1,float16,fp8,0,0.05804799795150757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,8,128,1,fp8,fp8,0,0.057548797130584715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,96,128,1,float16,float16,0,0.03912000060081482
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,96,128,1,float16,fp8,0,0.037036800384521486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,96,128,1,fp8,fp8,0,0.03713279962539673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,1,128,1,float16,float16,0,0.03499679863452911
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,1,128,1,float16,fp8,0,0.03505760133266449
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,2,128,1,float16,float16,0,0.03537760078907013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,2,128,1,float16,fp8,0,0.03500480055809021
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,2,128,1,fp8,fp8,0,0.034974399209022525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,4,128,1,float16,float16,0,0.03511680066585541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,4,128,1,float16,fp8,0,0.0346560001373291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,4,128,1,fp8,fp8,0,0.03415519893169403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,8,128,1,float16,float16,0,0.03513599932193756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,8,128,1,float16,fp8,0,0.03412159979343414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,8,128,1,fp8,fp8,0,0.03399200141429901
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,96,128,1,float16,float16,0,0.024931199848651886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,96,128,1,float16,fp8,0,0.02279040068387985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,96,128,1,fp8,fp8,0,0.02288320064544678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,1,128,1,float16,float16,0,0.02285760045051575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,96,2,128,1,float16,fp8,0,0.05773119926452637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,1,128,1,float16,fp8,0,0.022609600424766542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,1,128,1,fp8,fp8,0,0.020678399503231047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,2,128,1,float16,fp8,0,0.02072480022907257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,2,128,1,fp8,fp8,0,0.022510400414466857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,4,128,1,float16,float16,0,0.02268799990415573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,4,128,1,float16,fp8,0,0.02237759977579117
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,4,128,1,fp8,fp8,0,0.022908799350261688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,8,128,1,float16,float16,0,0.02266560047864914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,8,128,1,fp8,fp8,0,0.02160319983959198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,8,128,1,float16,fp8,0,0.022838400304317476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,96,1,128,1,fp8,fp8,0,0.03306879997253418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,96,128,1,float16,fp8,0,0.016476799547672272
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,96,128,1,fp8,fp8,0,0.014547200500965118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,1,128,1,float16,float16,0,0.01640319973230362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,1,128,1,float16,fp8,0,0.014534400403499603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,2,128,1,float16,float16,0,0.01576319932937622
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,1,128,1,fp8,fp8,0,0.014711999893188476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,2,128,1,float16,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,2,128,1,fp8,fp8,0,0.01478559970855713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,4,128,1,float16,float16,0,0.015422399342060088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,4,128,1,float16,fp8,0,0.014788800477981567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,4,128,1,fp8,fp8,0,0.01451680064201355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,8,128,1,float16,fp8,0,0.014528000354766845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,8,128,1,fp8,fp8,0,0.014510400593280792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,96,128,1,float16,float16,0,0.014457599818706512
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,96,128,1,float16,fp8,0,0.012465599924325943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,96,128,1,fp8,fp8,0,0.012459199875593185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,1,128,1,float16,float16,0,0.012438400089740754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,1,128,1,float16,fp8,0,0.012443199753761292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,1,128,1,fp8,fp8,0,0.012444800138473511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,2,128,1,float16,float16,0,0.012495999783277511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,2,128,1,float16,fp8,0,0.012408000230789185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,2,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,4,128,1,float16,float16,0,0.01244639977812767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,4,128,1,float16,fp8,0,0.012481600046157837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,96,2,128,1,float16,float16,0,0.02287999987602234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,4,128,1,fp8,fp8,0,0.0124719999730587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,8,128,1,float16,float16,0,0.012559999525547028
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,8,128,1,float16,fp8,0,0.012415999919176102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,96,8,128,1,fp8,fp8,0,0.012460800260305405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,96,128,1,float16,float16,0,0.012435200065374375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,96,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,1,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,1,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,1,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,2,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,2,128,1,float16,fp8,0,0.010351999849081039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,2,128,1,fp8,fp8,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,4,128,1,float16,float16,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,4,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,4,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,8,128,1,float16,float16,0,0.01666879951953888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,8,128,1,float16,fp8,0,0.010595200210809707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,8,128,1,fp8,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,96,1,128,1,float16,float16,0,0.4307424068450928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,96,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,96,96,128,1,float16,float16,0,0.01857759952545166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,96,1,128,1,float16,fp8,0,0.39950559139251707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,96,1,128,1,fp8,fp8,0,0.39876160621643064
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,96,2,128,1,float16,float16,0,0.4286367893218994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,96,8,128,1,float16,float16,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,96,2,128,1,float16,fp8,0,0.39914400577545167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,96,2,128,1,fp8,fp8,0,0.39923360347747805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,96,4,128,1,float16,fp8,0,0.39857919216156007
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,96,4,128,1,fp8,fp8,0,0.39906721115112304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,96,8,128,1,float16,fp8,0,0.3990015983581543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,96,8,128,1,fp8,fp8,0,0.3991039991378784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,96,128,1,float16,float16,0,0.22149760723114015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,96,128,1,float16,fp8,0,0.20538558959960937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,96,128,1,fp8,fp8,0,0.20515360832214355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,1,128,1,float16,float16,0,0.21930398941040039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,1,128,1,float16,fp8,0,0.2043071985244751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,1,128,1,fp8,fp8,0,0.20494558811187744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,2,128,1,float16,float16,0,0.21929121017456055
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,2,128,1,float16,fp8,0,0.2047408103942871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,2,128,1,fp8,fp8,0,0.20432000160217284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,4,128,1,float16,float16,0,0.21980960369110109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,4,128,1,float16,fp8,0,0.20499041080474853
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,4,128,1,fp8,fp8,0,0.20352160930633545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,0,0.21943199634552002
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,96,8,128,1,float16,float16,0,0.43309597969055175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,0,0.20422399044036865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,96,8,128,1,fp8,fp8,0,0.20476000308990477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,96,128,1,float16,float16,0,0.11692639589309692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,96,128,1,float16,fp8,0,0.106769597530365
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,1,128,1,float16,float16,0,0.11495360136032104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,1,128,1,float16,fp8,0,0.10669120550155639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,1,128,1,fp8,fp8,0,0.10679039955139161
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,2,128,1,float16,float16,0,0.11490880250930786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,2,128,1,float16,fp8,0,0.106659197807312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,2,128,1,fp8,fp8,0,0.10681439638137817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,4,128,1,float16,float16,0,0.11525919437408447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,4,128,1,float16,fp8,0,0.10680639743804932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,4,128,1,fp8,fp8,0,0.10665600299835205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,0,0.11506880521774292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,0,0.10666719675064087
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,96,128,1,float16,float16,0,0.0637776017189026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,8,128,1,fp8,fp8,0,0.10669280290603637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,96,128,1,float16,fp8,0,0.057543998956680296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,96,128,1,fp8,fp8,0,0.05857920050621033
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,1,128,1,float16,float16,0,0.06160640120506287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,1,128,1,float16,fp8,0,0.05761600136756897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,1,128,1,fp8,fp8,0,0.05755360126495361
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,2,128,1,float16,float16,0,0.06236479878425598
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,2,128,1,float16,fp8,0,0.057520002126693726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,2,128,1,fp8,fp8,0,0.057651197910308837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,4,128,1,float16,float16,0,0.061659198999404904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,4,128,1,float16,fp8,0,0.05766879916191101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,4,128,1,fp8,fp8,0,0.05754079818725586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,0,0.0624064028263092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,0,0.05767520070075989
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,96,8,128,1,fp8,fp8,0,0.058195197582244874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,96,128,1,float16,float16,0,0.03707199990749359
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,96,128,1,float16,fp8,0,0.03368319869041443
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,96,128,1,fp8,fp8,0,0.033062401413917544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,1,128,1,float16,float16,0,0.035071998834609985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,1,128,1,float16,fp8,0,0.03294720053672791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,1,128,1,fp8,fp8,0,0.03300159871578216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,2,128,1,float16,fp8,0,0.03427999913692474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,2,128,1,fp8,fp8,0,0.03424960076808929
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,96,96,128,1,fp8,fp8,0,0.10678240060806274
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,4,128,1,float16,float16,0,0.035120001435279845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,4,128,1,float16,fp8,0,0.03300319910049439
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,4,128,1,fp8,fp8,0,0.034350401163101195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,0,0.035062399506568906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,0,0.033036801218986514
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,8,128,1,fp8,fp8,0,0.03297280073165894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,96,128,1,float16,float16,0,0.024774399399757386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,96,128,1,float16,fp8,0,0.02067199945449829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,96,128,1,fp8,fp8,0,0.021279999613761903
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,1,128,1,float16,float16,0,0.022705599665641785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,1,128,1,float16,fp8,0,0.02075680047273636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,1,128,1,fp8,fp8,0,0.020745599269866945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,2,128,1,float16,float16,0,0.02273920029401779
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,2,128,1,float16,fp8,0,0.020715199410915375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,2,128,1,fp8,fp8,0,0.02080159932374954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,4,128,1,float16,float16,0,0.0227183997631073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,4,128,1,float16,fp8,0,0.02078399956226349
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,4,128,1,fp8,fp8,0,0.02064639925956726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,0,0.02272319942712784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,0,0.020788800716400147
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,96,8,128,1,fp8,fp8,0,0.02252320051193237
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,96,128,1,float16,fp8,0,0.014660799503326416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,96,128,1,fp8,fp8,0,0.014628799259662628
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,1,128,1,float16,float16,0,0.016382400691509248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,1,128,1,float16,fp8,0,0.01462399959564209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,1,128,1,fp8,fp8,0,0.01464959979057312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,2,128,1,float16,float16,0,0.014616000652313232
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,2,128,1,float16,fp8,0,0.014606399834156037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,96,4,128,1,float16,float16,0,0.4300032138824463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,2,128,1,fp8,fp8,0,0.014609600603580474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,4,128,1,float16,fp8,0,0.014547200500965118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,4,128,1,fp8,fp8,0,0.014635199308395385
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,96,2,128,1,float16,float16,0,0.03510079979896545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,0,0.01653439998626709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,0,0.014526399970054626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,8,128,1,fp8,fp8,0,0.014521600306034088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,96,128,1,float16,float16,0,0.014519999921321868
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,96,128,1,float16,fp8,0,0.012468799948692322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,96,128,1,fp8,fp8,0,0.01242400035262108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,1,128,1,float16,float16,0,0.01242400035262108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,1,128,1,float16,fp8,0,0.01250080019235611
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,1,128,1,fp8,fp8,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,2,128,1,float16,float16,0,0.012483199685811996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,2,128,1,float16,fp8,0,0.012427199631929398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,2,128,1,fp8,fp8,0,0.012468799948692322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,4,128,1,float16,float16,0,0.012503999471664428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,4,128,1,float16,fp8,0,0.012467200309038163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,4,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,96,128,1,float16,float16,0,0.0176816001534462
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,0,0.012460800260305405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,96,128,1,float16,float16,0,0.012516799569129943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,96,128,1,float16,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,96,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,1,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,1,128,1,float16,fp8,0,0.009033600240945816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,1,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,2,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,2,128,1,float16,fp8,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,2,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,4,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,4,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,4,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,0,0.010132800042629241
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,96,8,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,96,8,128,1,fp8,fp8,0,0.0124719999730587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,96,4,128,1,float16,float16,0,0.014860799908638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,64,1,128,1,float16,fp8,0,15.451641845703126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,64,1,128,1,fp8,fp8,0,15.683523559570313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,64,1,128,1,float16,float16,0,20.716232299804688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,64,2,128,1,float16,float16,0,20.73315887451172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,64,2,128,1,float16,fp8,0,15.83565673828125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,64,2,128,1,fp8,fp8,0,15.89892120361328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,64,4,128,1,float16,fp8,0,15.783059692382812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,64,4,128,1,float16,float16,0,21.77955017089844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,64,4,128,1,fp8,fp8,0,15.773829650878906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,64,8,128,1,float16,fp8,0,15.781887817382813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,64,8,128,1,float16,float16,0,22.443348693847657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,64,8,128,1,fp8,fp8,0,15.576329040527344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,64,128,1,float16,fp8,0,8.202305603027344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,64,128,1,fp8,fp8,0,8.326201629638671
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,1,128,1,float16,float16,0,10.688247680664062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,1,128,1,float16,fp8,0,7.737416076660156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,1,128,1,fp8,fp8,0,7.834404754638672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,2,128,1,float16,float16,0,10.754615783691406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,2,128,1,float16,fp8,0,7.804169464111328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,2,128,1,fp8,fp8,0,7.9494270324707035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,4,128,1,float16,float16,0,10.673242950439453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,4,128,1,float16,fp8,0,8.004190063476562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,4,128,1,fp8,fp8,0,8.126602935791016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,8,128,1,float16,fp8,0,7.864295959472656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,8,128,1,fp8,fp8,0,8.02886734008789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,8,128,1,float16,float16,0,10.8391357421875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,64,128,1,float16,fp8,0,4.244539260864258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,64,128,1,fp8,fp8,0,4.196940612792969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,1,128,1,float16,fp8,0,3.8723072052001952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,1,128,1,float16,float16,0,5.400662231445312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,1,128,1,fp8,fp8,0,3.877764892578125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,2,128,1,float16,float16,0,5.489622497558594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,2,128,1,float16,fp8,0,3.9108047485351562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,2,128,1,fp8,fp8,0,4.057022476196289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,4,128,1,float16,float16,0,5.2716625213623045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,4,128,1,float16,fp8,0,4.009552001953125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,4,128,1,fp8,fp8,0,4.203758239746094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,8,128,1,float16,float16,0,5.484358215332032
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,8,128,1,float16,fp8,0,3.869790267944336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,8,128,1,fp8,fp8,0,4.222753524780273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,64,128,1,float16,fp8,0,2.1104047775268553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,64,128,1,fp8,fp8,0,2.1200992584228517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,1,128,1,float16,float16,0,2.5215055465698244
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,1,128,1,float16,fp8,0,2.266110420227051
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,1,128,1,fp8,fp8,0,2.007904052734375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,2,128,1,float16,float16,0,2.522412872314453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,2,128,1,float16,fp8,0,2.3379135131835938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,2,128,1,fp8,fp8,0,1.9811456680297852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,4,128,1,float16,float16,0,2.562494468688965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,4,128,1,float16,fp8,0,2.150284767150879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,4,128,1,fp8,fp8,0,1.9563167572021485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,8,128,1,float16,float16,0,2.5158239364624024
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,8,128,1,float16,fp8,0,2.227681541442871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,8,128,1,fp8,fp8,0,2.0001312255859376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,64,1,128,1,float16,float16,0,12.68663330078125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,64,1,128,1,float16,fp8,0,9.139393615722657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,64,1,128,1,fp8,fp8,0,9.233715057373047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,64,2,128,1,float16,float16,0,12.890676879882813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,64,2,128,1,float16,fp8,0,9.132736206054688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,64,2,128,1,fp8,fp8,0,9.146321868896484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,64,4,128,1,float16,float16,0,12.8655029296875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,64,64,128,1,float16,float16,0,2.562059211730957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,64,64,128,1,float16,float16,0,5.210233688354492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,64,4,128,1,float16,fp8,0,9.31648941040039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,64,64,128,1,float16,float16,0,11.007504272460938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,64,4,128,1,fp8,fp8,0,8.942302703857422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,64,8,128,1,float16,float16,0,12.032923126220703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,64,8,128,1,float16,fp8,0,9.142726135253906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,64,8,128,1,fp8,fp8,0,8.946852874755859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,64,128,1,float16,fp8,0,4.899460983276367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,64,128,1,fp8,fp8,0,4.874862289428711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,64,128,1,float16,float16,0,6.365372848510742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,1,128,1,float16,float16,0,5.63671989440918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,1,128,1,float16,fp8,0,4.630017471313477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,1,128,1,fp8,fp8,0,4.524100875854492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,2,128,1,float16,fp8,0,4.507648086547851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,2,128,1,float16,float16,0,5.917919921875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,2,128,1,fp8,fp8,0,4.62567367553711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,4,128,1,float16,float16,0,6.11591682434082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,4,128,1,float16,fp8,0,4.500872039794922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,4,128,1,fp8,fp8,0,4.514547348022461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,64,128,1,float16,float16,0,3.06713752746582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,8,128,1,float16,float16,0,6.261447906494141
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,8,128,1,fp8,fp8,0,4.484638214111328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,64,8,128,1,float16,fp8,0,4.57337760925293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,64,128,1,float16,fp8,0,2.573320007324219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,1,128,1,float16,fp8,0,2.2860591888427733
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,64,128,1,fp8,fp8,0,2.7767263412475587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,1,128,1,float16,float16,0,2.7077152252197267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,1,128,1,fp8,fp8,0,2.2391935348510743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,2,128,1,float16,fp8,0,2.2555519104003907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,2,128,1,float16,float16,0,2.828296089172363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,2,128,1,fp8,fp8,0,2.8958127975463865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,4,128,1,float16,float16,0,2.7763248443603517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,4,128,1,float16,fp8,0,2.291472053527832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,4,128,1,fp8,fp8,0,2.682620811462402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,8,128,1,float16,float16,0,2.8123231887817384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,8,128,1,fp8,fp8,0,2.313523292541504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,64,8,128,1,float16,fp8,0,2.573716735839844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,64,128,1,float16,fp8,0,1.4808927536010743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,64,128,1,fp8,fp8,0,1.298896026611328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,1,128,1,float16,float16,0,1.3895359992980958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,1,128,1,float16,fp8,0,1.1502960205078125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,1,128,1,fp8,fp8,0,1.1526687622070313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,2,128,1,float16,fp8,0,1.1474111557006836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,2,128,1,float16,float16,0,1.4165583610534669
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,64,128,1,float16,float16,0,1.569320011138916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,2,128,1,fp8,fp8,0,1.168342399597168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,4,128,1,float16,float16,0,1.3927103996276855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,4,128,1,float16,fp8,0,1.2720720291137695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,4,128,1,fp8,fp8,0,1.148423957824707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,8,128,1,float16,float16,0,1.4588959693908692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,8,128,1,float16,fp8,0,1.1613696098327637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,64,8,128,1,fp8,fp8,0,1.1628432273864746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,64,1,128,1,fp8,fp8,0,6.38690071105957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,64,1,128,1,float16,fp8,0,6.440054321289063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,64,1,128,1,float16,float16,0,8.389566040039062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,64,2,128,1,float16,float16,0,8.609159851074219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,64,2,128,1,float16,fp8,0,6.37695198059082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,64,2,128,1,fp8,fp8,0,6.562446594238281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,64,4,128,1,float16,fp8,0,6.348283386230468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,64,4,128,1,float16,float16,0,8.514379119873047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,64,4,128,1,fp8,fp8,0,6.474947357177735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,64,8,128,1,float16,fp8,0,6.4424560546875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,64,8,128,1,float16,float16,0,9.07062225341797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,64,8,128,1,fp8,fp8,0,6.520059204101562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,64,128,1,float16,float16,0,4.59703369140625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,64,128,1,float16,fp8,0,3.501443099975586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,64,128,1,fp8,fp8,0,3.682012939453125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,1,128,1,float16,float16,0,4.075531387329102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,1,128,1,float16,fp8,0,3.4374561309814453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,1,128,1,fp8,fp8,0,3.1927600860595704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,2,128,1,float16,float16,0,4.295576095581055
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,2,128,1,float16,fp8,0,3.4695056915283202
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,2,128,1,fp8,fp8,0,3.1886016845703127
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,4,128,1,float16,float16,0,4.154308700561524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,4,128,1,float16,fp8,0,3.397412872314453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,4,128,1,fp8,fp8,0,3.1846927642822265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,8,128,1,float16,float16,0,4.0398719787597654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,64,128,1,float16,float16,0,2.2250368118286135
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,8,128,1,float16,fp8,0,3.5826446533203127
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,64,8,128,1,fp8,fp8,0,3.2490512847900392
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,64,128,1,fp8,fp8,0,1.9107776641845704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,1,128,1,float16,float16,0,1.9665519714355468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,1,128,1,float16,fp8,0,1.738145637512207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,1,128,1,fp8,fp8,0,1.6063936233520508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,2,128,1,float16,fp8,0,1.6205615997314453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,2,128,1,float16,float16,0,2.1540464401245116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,64,128,1,float16,fp8,0,2.036396789550781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,2,128,1,fp8,fp8,0,1.6140192031860352
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,4,128,1,fp8,fp8,0,1.5977408409118652
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,4,128,1,float16,fp8,0,1.806991958618164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,8,128,1,float16,float16,0,1.9836000442504882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,8,128,1,float16,fp8,0,1.6201360702514649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,8,128,1,fp8,fp8,0,1.6090944290161133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,64,128,1,float16,float16,0,1.24487361907959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,64,128,1,float16,fp8,0,0.9410927772521973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,64,128,1,fp8,fp8,0,0.9189647674560547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,1,128,1,float16,fp8,0,0.8397680282592773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,1,128,1,float16,float16,0,1.078384017944336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,1,128,1,fp8,fp8,0,0.8277600288391114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,64,4,128,1,float16,float16,0,2.0464431762695314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,2,128,1,float16,float16,0,0.984825611114502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,2,128,1,fp8,fp8,0,0.8289471626281738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,4,128,1,float16,float16,0,1.034280014038086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,4,128,1,float16,fp8,0,0.8394528388977051
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,4,128,1,fp8,fp8,0,0.8285951614379883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,8,128,1,float16,float16,0,1.06669282913208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,8,128,1,float16,fp8,0,0.8295295715332032
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,8,128,1,fp8,fp8,0,0.9100255966186523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,64,2,128,1,float16,fp8,0,0.9079296112060546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,64,1,128,1,float16,fp8,0,8.506708526611328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,64,1,128,1,fp8,fp8,0,8.381208038330078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,64,1,128,1,float16,float16,0,11.080345916748048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,64,2,128,1,float16,float16,0,10.92553939819336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,64,2,128,1,float16,fp8,0,8.511246490478516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,64,2,128,1,fp8,fp8,0,8.456324768066406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,64,4,128,1,float16,float16,0,11.584156799316407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,64,4,128,1,float16,fp8,0,8.362433624267577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,64,4,128,1,fp8,fp8,0,8.7447998046875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,64,8,128,1,float16,float16,0,11.649059295654297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,64,8,128,1,float16,fp8,0,8.400736236572266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,64,8,128,1,fp8,fp8,0,8.584862518310548
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,64,128,1,float16,float16,0,6.3034912109375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,64,128,1,float16,fp8,0,4.910422515869141
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,64,128,1,fp8,fp8,0,4.675316619873047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,1,128,1,float16,fp8,0,4.200161743164062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,1,128,1,float16,float16,0,5.454643249511719
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,1,128,1,fp8,fp8,0,4.199774551391601
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,2,128,1,float16,float16,0,5.198303985595703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,2,128,1,float16,fp8,0,4.286603164672852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,2,128,1,fp8,fp8,0,4.313671875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,4,128,1,float16,float16,0,5.4955089569091795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,4,128,1,float16,fp8,0,4.229996871948242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,4,128,1,fp8,fp8,0,4.301303863525391
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,8,128,1,float16,float16,0,5.471393585205078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,64,128,1,float16,float16,0,3.0680944442749025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,8,128,1,float16,fp8,0,4.186921691894531
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,64,8,128,1,fp8,fp8,0,4.262838363647461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,64,128,1,float16,fp8,0,2.4593759536743165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,1,128,1,float16,float16,0,2.5916032791137695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,64,128,1,fp8,fp8,0,2.602118492126465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,1,128,1,float16,fp8,0,2.155897521972656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,1,128,1,fp8,fp8,0,2.089785575866699
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,2,128,1,float16,fp8,0,2.1667760848999023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,2,128,1,float16,float16,0,2.8586063385009766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,4,128,1,float16,float16,0,2.6172800064086914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,4,128,1,float16,fp8,0,2.1038623809814454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,4,128,1,fp8,fp8,0,2.3671424865722654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,2,128,1,fp8,fp8,0,2.10406551361084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,8,128,1,float16,float16,0,2.769607925415039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,8,128,1,float16,fp8,0,2.1703840255737306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,64,8,128,1,fp8,fp8,0,2.1092351913452148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,64,128,1,float16,fp8,0,1.399625587463379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,64,128,1,fp8,fp8,0,1.2469599723815918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,1,128,1,float16,fp8,0,1.0774991989135743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,1,128,1,float16,float16,0,1.290775966644287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,1,128,1,fp8,fp8,0,1.082590389251709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,2,128,1,float16,float16,0,1.2768256187438964
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,2,128,1,float16,fp8,0,1.257844829559326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,2,128,1,fp8,fp8,0,1.0778608322143555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,64,128,1,float16,float16,0,1.491335964202881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,4,128,1,float16,float16,0,1.3222831726074218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,4,128,1,fp8,fp8,0,1.1037775993347168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,8,128,1,float16,float16,0,1.2934687614440918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,8,128,1,fp8,fp8,0,1.092420768737793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,8,128,1,float16,fp8,0,1.192632007598877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,64,128,1,float16,fp8,0,0.6382544040679932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,64,128,1,float16,float16,0,0.7771488189697265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,64,128,1,fp8,fp8,0,0.7462880134582519
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,1,128,1,float16,fp8,0,0.5611167907714844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,1,128,1,float16,float16,0,0.6831520080566407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,1,128,1,fp8,fp8,0,0.6740928173065186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,2,128,1,float16,float16,0,0.6606063842773438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,2,128,1,float16,fp8,0,0.5856495857238769
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,2,128,1,fp8,fp8,0,0.5584335803985596
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,64,4,128,1,float16,fp8,0,1.1098496437072753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,4,128,1,float16,fp8,0,0.5814256191253662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,4,128,1,float16,float16,0,0.6660336017608642
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,4,128,1,fp8,fp8,0,0.5593520164489746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,8,128,1,float16,float16,0,0.6767583847045898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,8,128,1,float16,fp8,0,0.5627600193023682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,64,8,128,1,fp8,fp8,0,0.5699376106262207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,64,1,128,1,float16,fp8,0,4.9327953338623045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,64,1,128,1,fp8,fp8,0,4.978715133666992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,64,1,128,1,float16,float16,0,6.258995056152344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,64,2,128,1,float16,float16,0,6.1208446502685545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,64,2,128,1,float16,fp8,0,4.933540725708008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,64,2,128,1,fp8,fp8,0,5.068268966674805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,64,4,128,1,float16,fp8,0,4.973948669433594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,64,4,128,1,float16,float16,0,6.353828811645508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,64,4,128,1,fp8,fp8,0,4.946905517578125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,64,8,128,1,float16,fp8,0,4.925431823730468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,64,8,128,1,float16,float16,0,6.78704605102539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,64,8,128,1,fp8,fp8,0,5.041044616699219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,64,128,1,float16,float16,0,3.620809555053711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,64,128,1,float16,fp8,0,2.8796335220336915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,64,128,1,fp8,fp8,0,3.0862831115722655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,1,128,1,float16,float16,0,3.0385791778564455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,1,128,1,float16,fp8,0,2.468924713134766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,1,128,1,fp8,fp8,0,2.4671663284301757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,2,128,1,float16,fp8,0,2.800984001159668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,2,128,1,float16,float16,0,3.1349327087402346
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,2,128,1,fp8,fp8,0,2.506780815124512
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,4,128,1,float16,float16,0,3.0668960571289063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,4,128,1,float16,fp8,0,2.4796239852905275
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,4,128,1,fp8,fp8,0,2.633358383178711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,8,128,1,float16,float16,0,3.188822364807129
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,8,128,1,float16,fp8,0,2.7336496353149413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,64,8,128,1,fp8,fp8,0,2.482057571411133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,64,128,1,float16,float16,0,1.9925472259521484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,64,128,1,float16,fp8,0,1.4513983726501465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,64,128,1,fp8,fp8,0,1.4641087532043457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,1,128,1,float16,float16,0,1.5928879737854005
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,1,128,1,float16,fp8,0,1.2428751945495606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,1,128,1,fp8,fp8,0,1.2476335525512696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,2,128,1,float16,float16,0,1.6211103439331054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,2,128,1,float16,fp8,0,1.2450960159301758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,2,128,1,fp8,fp8,0,1.2617088317871095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,4,128,1,float16,float16,0,1.467841625213623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,4,128,1,fp8,fp8,0,1.244643211364746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,4,128,1,float16,fp8,0,1.5124367713928222
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,8,128,1,float16,float16,0,1.555089569091797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,64,128,1,float16,float16,0,0.9339216232299805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,8,128,1,float16,fp8,0,1.3746303558349608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,64,8,128,1,fp8,fp8,0,1.2473872184753418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,64,128,1,float16,fp8,0,0.8033727645874024
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,64,128,1,fp8,fp8,0,0.7801663875579834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,1,128,1,float16,float16,0,0.7713312149047852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,1,128,1,float16,fp8,0,0.6412447929382324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,1,128,1,fp8,fp8,0,0.6647200107574462
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,2,128,1,float16,fp8,0,0.6430431842803955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,2,128,1,fp8,fp8,0,0.6418144226074218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,4,128,1,float16,float16,0,0.7821199893951416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,4,128,1,float16,fp8,0,0.6447167873382569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,4,128,1,fp8,fp8,0,0.6429776191711426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,8,128,1,float16,fp8,0,0.6429456233978271
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,8,128,1,float16,float16,0,0.7961872100830079
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,64,128,1,float16,fp8,0,0.42868480682373045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,64,128,1,float16,float16,0,0.48664321899414065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,64,128,1,fp8,fp8,0,0.42968158721923827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,1,128,1,float16,fp8,0,0.3732192039489746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,1,128,1,fp8,fp8,0,0.3421792030334473
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,2,128,1,float16,float16,0,0.7643743991851807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,2,128,1,float16,float16,0,0.40671358108520506
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,2,128,1,float16,fp8,0,0.34195520877838137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,2,128,1,fp8,fp8,0,0.34215359687805175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,4,128,1,float16,float16,0,0.40105280876159666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,4,128,1,float16,fp8,0,0.34257919788360597
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,4,128,1,fp8,fp8,0,0.3413647890090942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,64,8,128,1,fp8,fp8,0,0.6436848163604736
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,8,128,1,float16,float16,0,0.4154367923736572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,8,128,1,float16,fp8,0,0.3423135995864868
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,8,128,1,fp8,fp8,0,0.34198238849639895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,64,1,128,1,float16,float16,0,0.39297919273376464
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,64,1,128,1,float16,fp8,0,4.746739196777344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,64,1,128,1,fp8,fp8,0,4.7331184387207035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,64,1,128,1,float16,float16,0,6.0876625061035154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,64,2,128,1,float16,float16,0,5.771539306640625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,64,2,128,1,float16,fp8,0,4.729004669189453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,64,2,128,1,fp8,fp8,0,4.746187210083008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,64,4,128,1,float16,fp8,0,4.824248123168945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,64,4,128,1,float16,float16,0,6.190934371948242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,64,4,128,1,fp8,fp8,0,4.741113662719727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,64,8,128,1,float16,float16,0,6.04649772644043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,64,8,128,1,float16,fp8,0,4.754632186889649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,64,8,128,1,fp8,fp8,0,4.8633583068847654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,64,128,1,float16,float16,0,3.68392333984375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,64,128,1,float16,fp8,0,3.06823673248291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,64,128,1,fp8,fp8,0,2.9290960311889647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,1,128,1,float16,float16,0,2.8790159225463867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,1,128,1,float16,fp8,0,2.5364496231079103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,1,128,1,fp8,fp8,0,2.371089553833008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,2,128,1,float16,float16,0,2.7388479232788088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,2,128,1,float16,fp8,0,2.422427177429199
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,2,128,1,fp8,fp8,0,2.3853567123413084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,4,128,1,float16,float16,0,2.7857616424560545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,4,128,1,float16,fp8,0,2.4882623672485353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,4,128,1,fp8,fp8,0,2.4309215545654297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,8,128,1,float16,float16,0,3.010985565185547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,8,128,1,float16,fp8,0,2.86682071685791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,64,8,128,1,fp8,fp8,0,2.3815776824951174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,64,128,1,float16,fp8,0,1.6840991973876953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,1,128,1,float16,float16,0,1.3859264373779296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,64,128,1,fp8,fp8,0,1.4815919876098633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,1,128,1,float16,fp8,0,1.2068063735961914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,1,128,1,fp8,fp8,0,1.2841360092163085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,2,128,1,float16,float16,0,1.388651180267334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,2,128,1,float16,fp8,0,1.201030445098877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,64,128,1,float16,float16,0,1.8766815185546875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,2,128,1,fp8,fp8,0,1.2465184211730957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,4,128,1,float16,float16,0,1.4327839851379394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,4,128,1,float16,fp8,0,1.2022704124450683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,4,128,1,fp8,fp8,0,1.205782413482666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,8,128,1,float16,float16,0,1.4269871711730957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,64,128,1,float16,float16,0,0.9415760040283203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,8,128,1,float16,fp8,0,1.3065343856811524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,64,8,128,1,fp8,fp8,0,1.2053152084350587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,64,128,1,float16,fp8,0,0.7550447940826416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,1,128,1,float16,fp8,0,0.6150735855102539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,64,128,1,fp8,fp8,0,0.7566912174224854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,1,128,1,float16,float16,0,0.7070000171661377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,1,128,1,fp8,fp8,0,0.6681888103485107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,2,128,1,float16,fp8,0,0.6180304050445556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,2,128,1,float16,float16,0,0.7172255992889405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,2,128,1,fp8,fp8,0,0.6158112049102783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,4,128,1,float16,float16,0,0.7248271942138672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,4,128,1,float16,fp8,0,0.6183184146881103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,4,128,1,fp8,fp8,0,0.6170015811920166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,8,128,1,float16,fp8,0,0.6190720081329346
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,8,128,1,fp8,fp8,0,0.6184383869171143
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,64,8,128,1,float16,float16,0,0.7338560104370118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,64,128,1,float16,fp8,0,0.39347999095916747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,64,128,1,float16,float16,0,0.4878687858581543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,1,128,1,float16,fp8,0,0.32410080432891847
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,1,128,1,fp8,fp8,0,0.32395360469818113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,2,128,1,float16,float16,0,0.37747039794921877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,2,128,1,float16,fp8,0,0.3239311933517456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,2,128,1,fp8,fp8,0,0.3237616062164307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,4,128,1,float16,float16,0,0.3803807973861694
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,4,128,1,float16,fp8,0,0.32364640235900877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,64,128,1,fp8,fp8,0,0.4092895984649658
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,1,128,1,float16,float16,0,0.3731136083602905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,8,128,1,float16,float16,0,0.3774415969848633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,8,128,1,float16,fp8,0,0.32435040473937987
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,8,128,1,fp8,fp8,0,0.324070405960083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,64,128,1,float16,fp8,0,0.21321280002593995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,1,128,1,float16,float16,0,0.19844640493392945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,64,128,1,fp8,fp8,0,0.21325440406799318
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,1,128,1,float16,fp8,0,0.17448960542678832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,1,128,1,fp8,fp8,0,0.17474559545516968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,2,128,1,float16,float16,0,0.1992192029953003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,2,128,1,float16,fp8,0,0.17573280334472657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,2,128,1,fp8,fp8,0,0.17549120187759398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,4,128,1,float16,fp8,0,0.1755743980407715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,4,128,1,fp8,fp8,0,0.17531360387802125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,8,128,1,float16,float16,0,0.20434720516204835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,64,4,128,1,fp8,fp8,0,0.32364959716796876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,8,128,1,fp8,fp8,0,0.1763327956199646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,64,128,1,float16,float16,0,0.26000640392303465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,4,128,1,float16,float16,0,0.20256481170654297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,64,8,128,1,float16,fp8,0,0.17578400373458863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,64,1,128,1,float16,float16,0,3.472911834716797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,64,1,128,1,float16,fp8,0,2.8777439117431642
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,64,1,128,1,fp8,fp8,0,2.875796890258789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,64,2,128,1,float16,float16,0,3.315715026855469
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,64,2,128,1,float16,fp8,0,2.926304054260254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,64,2,128,1,fp8,fp8,0,2.890012741088867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,64,4,128,1,float16,float16,0,3.3396896362304687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,64,4,128,1,fp8,fp8,0,2.9247568130493162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,64,4,128,1,float16,fp8,0,2.968671989440918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,64,8,128,1,float16,float16,0,3.5284767150878906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,64,8,128,1,float16,fp8,0,3.0122127532958984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,64,128,1,float16,float16,0,2.243062400817871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,64,128,1,float16,fp8,0,1.9260400772094726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,64,8,128,1,fp8,fp8,0,2.906260871887207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,1,128,1,float16,float16,0,1.673846435546875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,64,128,1,fp8,fp8,0,1.847153663635254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,1,128,1,float16,fp8,0,1.4547616004943849
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,2,128,1,float16,fp8,0,1.4497920036315919
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,2,128,1,float16,float16,0,1.6823200225830077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,2,128,1,fp8,fp8,0,1.4535856246948242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,4,128,1,float16,fp8,0,1.570035171508789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,4,128,1,float16,float16,0,1.676171112060547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,1,128,1,fp8,fp8,0,1.4535632133483887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,8,128,1,float16,fp8,0,1.5940208435058594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,8,128,1,float16,float16,0,1.7026336669921875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,8,128,1,fp8,fp8,0,1.458456039428711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,64,128,1,float16,fp8,0,1.0308128356933595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,64,128,1,float16,float16,0,1.1383935928344726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,64,128,1,fp8,fp8,0,0.9401535987854004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,64,4,128,1,fp8,fp8,0,1.4545791625976563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,1,128,1,float16,fp8,0,0.7642767906188965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,1,128,1,fp8,fp8,0,0.7389023780822754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,2,128,1,float16,float16,0,0.8387056350708008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,2,128,1,float16,fp8,0,0.7508048057556153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,2,128,1,fp8,fp8,0,0.7404895782470703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,4,128,1,float16,float16,0,0.854916763305664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,4,128,1,float16,fp8,0,0.7387440204620361
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,4,128,1,fp8,fp8,0,0.7473087787628174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,8,128,1,float16,fp8,0,0.7407951831817627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,8,128,1,float16,float16,0,0.8820976257324219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,8,128,1,fp8,fp8,0,0.7429855823516845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,64,128,1,float16,fp8,0,0.5106031894683838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,64,128,1,fp8,fp8,0,0.48407678604125975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,64,1,128,1,float16,float16,0,0.8559200286865234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,1,128,1,float16,fp8,0,0.38226239681243895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,1,128,1,float16,float16,0,0.437883186340332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,1,128,1,fp8,fp8,0,0.38121120929718016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,2,128,1,float16,fp8,0,0.3917327880859375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,2,128,1,fp8,fp8,0,0.38127999305725097
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,4,128,1,float16,float16,0,0.4398191928863525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,4,128,1,float16,fp8,0,0.3813055992126465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,4,128,1,fp8,fp8,0,0.3892080068588257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,8,128,1,float16,float16,0,0.4448751926422119
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,8,128,1,float16,fp8,0,0.3836575984954834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,8,128,1,fp8,fp8,0,0.38307039737701415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,64,128,1,float16,float16,0,0.30800800323486327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,64,128,1,float16,fp8,0,0.2549504041671753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,64,128,1,fp8,fp8,0,0.2550096035003662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,1,128,1,float16,float16,0,0.2272495985031128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,1,128,1,float16,fp8,0,0.2037519931793213
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,1,128,1,fp8,fp8,0,0.20300960540771484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,2,128,1,float16,float16,0,0.23004000186920165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,2,128,1,float16,fp8,0,0.20298559665679933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,2,128,1,fp8,fp8,0,0.20387520790100097
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,4,128,1,float16,float16,0,0.22971200942993164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,2,128,1,float16,float16,0,0.4391119956970215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,4,128,1,float16,fp8,0,0.20331039428710937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,4,128,1,fp8,fp8,0,0.20468640327453613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,8,128,1,float16,float16,0,0.23434081077575683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,8,128,1,float16,fp8,0,0.20456159114837646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,64,8,128,1,fp8,fp8,0,0.20327680110931395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,64,128,1,float16,float16,0,0.16931999921798707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,64,128,1,fp8,fp8,0,0.14153120517730713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,1,128,1,float16,float16,0,0.12690880298614501
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,1,128,1,float16,fp8,0,0.1130079984664917
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,1,128,1,fp8,fp8,0,0.11304479837417603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,2,128,1,float16,float16,0,0.12722400426864625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,2,128,1,float16,fp8,0,0.1130687952041626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,64,64,128,1,float16,float16,0,0.585478401184082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,2,128,1,fp8,fp8,0,0.11309119462966918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,4,128,1,float16,float16,0,0.12671200037002564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,4,128,1,float16,fp8,0,0.11315840482711792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,4,128,1,fp8,fp8,0,0.11307200193405151
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,8,128,1,float16,float16,0,0.1326624035835266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,8,128,1,float16,fp8,0,0.11313439607620239
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,8,128,1,fp8,fp8,0,0.11427520513534546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,64,64,128,1,float16,fp8,0,0.14059040546417237
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,64,1,128,1,float16,fp8,0,2.943796730041504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,64,1,128,1,float16,float16,0,3.3208656311035156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,64,1,128,1,fp8,fp8,0,2.9344831466674806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,64,2,128,1,float16,fp8,0,2.9394655227661133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,64,2,128,1,fp8,fp8,0,3.023382377624512
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,64,4,128,1,float16,float16,0,3.321424102783203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,64,4,128,1,float16,fp8,0,2.947849655151367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,64,4,128,1,fp8,fp8,0,2.9386415481567383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,64,2,128,1,float16,float16,0,3.3472927093505858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,64,8,128,1,float16,float16,0,3.465059280395508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,64,128,1,float16,float16,0,2.3981279373168944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,64,8,128,1,float16,fp8,0,2.951326370239258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,64,8,128,1,fp8,fp8,0,2.9488576889038085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,64,128,1,float16,fp8,0,2.098255920410156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,1,128,1,float16,float16,0,1.6816911697387695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,1,128,1,float16,fp8,0,1.478321647644043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,64,128,1,fp8,fp8,0,2.0135776519775392
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,1,128,1,fp8,fp8,0,1.4777423858642578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,2,128,1,float16,fp8,0,1.478223991394043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,2,128,1,float16,float16,0,1.7601184844970703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,2,128,1,fp8,fp8,0,1.4797792434692383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,4,128,1,float16,float16,0,1.6677824020385743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,4,128,1,float16,fp8,0,1.4793392181396485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,4,128,1,fp8,fp8,0,1.6536575317382813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,8,128,1,float16,float16,0,1.724990463256836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,8,128,1,float16,fp8,0,1.520035171508789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,64,128,1,float16,float16,0,1.2218799591064453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,64,128,1,float16,fp8,0,1.0158656120300293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,64,8,128,1,fp8,fp8,0,1.4838128089904785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,64,128,1,fp8,fp8,0,1.0112480163574218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,1,128,1,float16,fp8,0,0.7481616020202637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,1,128,1,fp8,fp8,0,0.7490272045135498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,1,128,1,float16,float16,0,0.8434240341186523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,2,128,1,float16,float16,0,0.8267968177795411
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,2,128,1,float16,fp8,0,0.7498528003692627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,2,128,1,fp8,fp8,0,0.7495327949523926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,4,128,1,float16,float16,0,0.876427173614502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,4,128,1,float16,fp8,0,0.7490911960601807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,4,128,1,fp8,fp8,0,0.7507391929626465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,8,128,1,float16,float16,0,0.8700431823730469
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,8,128,1,float16,fp8,0,0.8082400321960449
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,64,128,1,float16,float16,0,0.6168943881988526
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,64,8,128,1,fp8,fp8,0,0.7543200016021728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,64,128,1,float16,fp8,0,0.5164495944976807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,64,128,1,fp8,fp8,0,0.5514880180358886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,1,128,1,float16,float16,0,0.42630720138549805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,1,128,1,float16,fp8,0,0.3843456029891968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,1,128,1,fp8,fp8,0,0.3849744081497192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,2,128,1,float16,float16,0,0.44304637908935546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,2,128,1,float16,fp8,0,0.384934401512146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,2,128,1,fp8,fp8,0,0.3845776081085205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,4,128,1,float16,float16,0,0.4300191879272461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,4,128,1,float16,fp8,0,0.3858432054519653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,4,128,1,fp8,fp8,0,0.38569600582122804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,8,128,1,float16,float16,0,0.44307680130004884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,8,128,1,fp8,fp8,0,0.386897611618042
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,64,128,1,float16,float16,0,0.32028961181640625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,64,128,1,fp8,fp8,0,0.27038719654083254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,1,128,1,float16,float16,0,0.2232111930847168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,1,128,1,float16,fp8,0,0.20300641059875488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,1,128,1,fp8,fp8,0,0.20259039402008056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,2,128,1,float16,float16,0,0.22734239101409912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,2,128,1,float16,fp8,0,0.20288639068603515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,2,128,1,fp8,fp8,0,0.20317440032958983
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,4,128,1,float16,float16,0,0.2272144079208374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,4,128,1,float16,fp8,0,0.20327200889587402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,4,128,1,fp8,fp8,0,0.20303680896759033
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,8,128,1,float16,float16,0,0.2367919921875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,8,128,1,float16,fp8,0,0.2029792070388794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,8,128,1,fp8,fp8,0,0.20378880500793456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,64,128,1,float16,float16,0,0.17239199876785277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,64,128,1,float16,fp8,0,0.14615520238876342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,64,128,1,fp8,fp8,0,0.14525120258331298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,1,128,1,float16,float16,0,0.12378239631652832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,1,128,1,float16,fp8,0,0.1096127986907959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,1,128,1,fp8,fp8,0,0.11063840389251708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,2,128,1,float16,float16,0,0.1217952013015747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,2,128,1,float16,fp8,0,0.11078079938888549
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,64,8,128,1,float16,fp8,0,0.3865231990814209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,4,128,1,float16,float16,0,0.12302080392837525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,64,64,128,1,float16,fp8,0,0.26934399604797366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,4,128,1,float16,fp8,0,0.11074559688568116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,4,128,1,fp8,fp8,0,0.10993759632110596
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,8,128,1,float16,float16,0,0.1256991982460022
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,8,128,1,float16,fp8,0,0.11065759658813476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,8,128,1,fp8,fp8,0,0.11102720499038696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,64,128,1,float16,float16,0,0.09856160283088684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,64,128,1,float16,fp8,0,0.0818943977355957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,64,128,1,fp8,fp8,0,0.0821183979511261
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,1,128,1,float16,float16,0,0.06997600197792053
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,1,128,1,float16,fp8,0,0.064300799369812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,1,128,1,fp8,fp8,0,0.06497439742088318
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,2,128,1,float16,float16,0,0.06985440254211425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,2,128,1,float16,fp8,0,0.06521440148353577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,2,128,1,fp8,fp8,0,0.06388480067253113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,4,128,1,float16,float16,0,0.0715120017528534
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,4,128,1,float16,fp8,0,0.06386880278587341
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,4,128,1,fp8,fp8,0,0.06467679738998414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,8,128,1,float16,float16,0,0.0717519998550415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,8,128,1,float16,fp8,0,0.06435199975967407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,64,2,128,1,fp8,fp8,0,0.11078879833221436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,64,1,128,1,float16,float16,0,2.015648078918457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,64,1,128,1,float16,fp8,0,1.8741792678833007
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,64,8,128,1,fp8,fp8,0,0.06407039761543273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,64,1,128,1,fp8,fp8,0,1.9866928100585937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,64,2,128,1,float16,float16,0,2.040852737426758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,64,2,128,1,float16,fp8,0,1.8756864547729493
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,64,2,128,1,fp8,fp8,0,1.8703727722167969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,64,4,128,1,float16,float16,0,2.0706207275390627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,64,4,128,1,float16,fp8,0,1.8736112594604493
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,64,4,128,1,fp8,fp8,0,1.8774944305419923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,64,8,128,1,float16,float16,0,2.166024017333984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,64,8,128,1,float16,fp8,0,1.952115249633789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,64,128,1,float16,float16,0,1.588923168182373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,64,8,128,1,fp8,fp8,0,1.8796512603759765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,64,128,1,float16,fp8,0,1.399014377593994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,64,128,1,fp8,fp8,0,1.3299488067626952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,1,128,1,float16,float16,0,1.0456640243530273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,1,128,1,float16,fp8,0,0.944542407989502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,1,128,1,fp8,fp8,0,0.9565312385559082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,2,128,1,float16,float16,0,1.0222432136535644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,2,128,1,float16,fp8,0,0.968569564819336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,2,128,1,fp8,fp8,0,0.9451503753662109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,4,128,1,float16,float16,0,1.0398655891418458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,4,128,1,float16,fp8,0,0.9474191665649414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,4,128,1,fp8,fp8,0,0.9478960037231445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,8,128,1,float16,float16,0,1.0881711959838867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,8,128,1,float16,fp8,0,0.9482383728027344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,64,8,128,1,fp8,fp8,0,0.9485360145568847
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,64,128,1,float16,fp8,0,0.6781839847564697
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,64,128,1,fp8,fp8,0,0.7088719844818115
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,1,128,1,float16,float16,0,0.5183887958526612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,1,128,1,float16,fp8,0,0.4803008079528809
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,1,128,1,fp8,fp8,0,0.4806047916412354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,2,128,1,float16,float16,0,0.5218976020812989
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,2,128,1,float16,fp8,0,0.4807231903076172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,2,128,1,fp8,fp8,0,0.4801648139953613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,4,128,1,float16,float16,0,0.5349440097808837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,4,128,1,float16,fp8,0,0.48157601356506347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,4,128,1,fp8,fp8,0,0.4812687873840332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,8,128,1,float16,float16,0,0.5547647953033448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,8,128,1,fp8,fp8,0,0.4830783843994141
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,64,128,1,float16,float16,0,0.4073535919189453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,64,128,1,float16,fp8,0,0.3466383934020996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,64,128,1,float16,float16,0,0.804748821258545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,64,128,1,fp8,fp8,0,0.34805281162261964
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,1,128,1,float16,fp8,0,0.24971039295196534
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,1,128,1,fp8,fp8,0,0.24960639476776122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,2,128,1,float16,float16,0,0.2749696016311646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,2,128,1,fp8,fp8,0,0.2500607967376709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,4,128,1,float16,float16,0,0.27946879863739016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,4,128,1,float16,fp8,0,0.2500799894332886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,4,128,1,fp8,fp8,0,0.2504863977432251
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,8,128,1,float16,float16,0,0.28941919803619387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,64,8,128,1,float16,fp8,0,0.4838655948638916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,8,128,1,float16,fp8,0,0.2506623983383179
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,8,128,1,fp8,fp8,0,0.2500672101974487
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,64,128,1,float16,float16,0,0.21604321002960206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,64,128,1,float16,fp8,0,0.1826464056968689
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,64,128,1,fp8,fp8,0,0.1841760039329529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,1,128,1,float16,float16,0,0.14417120218276977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,1,128,1,float16,fp8,0,0.13348000049591063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,1,128,1,fp8,fp8,0,0.1335088014602661
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,2,128,1,float16,float16,0,0.14689919948577881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,2,128,1,float16,fp8,0,0.13352479934692382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,2,128,1,fp8,fp8,0,0.13344960212707518
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,2,128,1,float16,fp8,0,0.24889760017395018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,4,128,1,float16,float16,0,0.14697760343551636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,4,128,1,float16,fp8,0,0.13436319828033447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,4,128,1,fp8,fp8,0,0.13335520029067993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,8,128,1,float16,float16,0,0.15154080390930175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,8,128,1,float16,fp8,0,0.13435200452804566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,64,8,128,1,fp8,fp8,0,0.1336575984954834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,64,128,1,float16,float16,0,0.11645760536193847
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,64,128,1,float16,fp8,0,0.1007904052734375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,64,128,1,fp8,fp8,0,0.10055040121078491
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,1,128,1,float16,float16,0,0.08008800148963928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,1,128,1,float16,fp8,0,0.07416960000991821
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,1,128,1,fp8,fp8,0,0.07409600019454957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,2,128,1,float16,float16,0,0.08173760175704955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,2,128,1,float16,fp8,0,0.07404320240020752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,2,128,1,fp8,fp8,0,0.07450079917907715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,4,128,1,float16,float16,0,0.08214719891548157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,4,128,1,float16,fp8,0,0.07399839758872986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,4,128,1,fp8,fp8,0,0.0739471971988678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,8,128,1,float16,float16,0,0.08505920171737671
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,8,128,1,float16,fp8,0,0.07426880002021789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,64,8,128,1,fp8,fp8,0,0.07397599816322327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,64,128,1,float16,float16,0,0.06785920262336731
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,64,128,1,float16,fp8,0,0.05759040117263794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,64,128,1,fp8,fp8,0,0.05766400098800659
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,1,128,1,float16,float16,0,0.04982239902019501
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,1,128,1,float16,fp8,0,0.0455375999212265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,1,128,1,fp8,fp8,0,0.045531201362609866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,2,128,1,float16,float16,0,0.049404799938201904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,2,128,1,float16,fp8,0,0.04554559886455536
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,2,128,1,fp8,fp8,0,0.0454479992389679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,4,128,1,float16,float16,0,0.05023199915885925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,4,128,1,float16,fp8,0,0.04531840085983276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,64,1,128,1,float16,float16,0,0.26785600185394287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,8,128,1,float16,float16,0,0.050993597507476805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,8,128,1,float16,fp8,0,0.045238399505615236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,8,128,1,fp8,fp8,0,0.045326399803161624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,64,1,128,1,float16,float16,0,2.133678436279297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,64,1,128,1,float16,fp8,0,2.0377391815185546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,64,4,128,1,fp8,fp8,0,0.04589119851589203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,64,1,128,1,fp8,fp8,0,2.041275215148926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,64,2,128,1,float16,float16,0,2.1501903533935547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,64,2,128,1,float16,fp8,0,2.042441558837891
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,64,2,128,1,fp8,fp8,0,2.038870429992676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,64,4,128,1,float16,float16,0,2.200934410095215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,64,4,128,1,float16,fp8,0,2.0411792755126954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,64,4,128,1,fp8,fp8,0,2.045022392272949
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,64,8,128,1,float16,float16,0,2.345721626281738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,64,8,128,1,float16,fp8,0,2.0519872665405274
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,64,128,1,float16,float16,0,1.7992591857910156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,64,128,1,float16,fp8,0,1.5456080436706543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,64,128,1,fp8,fp8,0,1.5290528297424317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,1,128,1,float16,float16,0,1.0980863571166992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,1,128,1,float16,fp8,0,1.026972770690918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,1,128,1,fp8,fp8,0,1.0258159637451172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,2,128,1,float16,float16,0,1.1025775909423827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,2,128,1,float16,fp8,0,1.0271391868591309
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,64,8,128,1,fp8,fp8,0,2.047235107421875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,2,128,1,fp8,fp8,0,1.0265263557434081
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,4,128,1,float16,float16,0,1.1370800018310547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,4,128,1,float16,fp8,0,1.0285120010375977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,4,128,1,fp8,fp8,0,1.0267600059509276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,8,128,1,float16,float16,0,1.162936019897461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,8,128,1,float16,fp8,0,1.037996768951416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,64,8,128,1,fp8,fp8,0,1.0323087692260742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,64,128,1,float16,fp8,0,0.8037887573242187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,1,128,1,float16,float16,0,0.550377607345581
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,64,128,1,fp8,fp8,0,0.7729023933410645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,1,128,1,fp8,fp8,0,0.5207263946533203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,1,128,1,float16,fp8,0,0.5398640155792236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,2,128,1,float16,float16,0,0.5584415912628173
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,2,128,1,float16,fp8,0,0.5205679893493652
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,2,128,1,fp8,fp8,0,0.5202303886413574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,4,128,1,float16,float16,0,0.5717567920684814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,4,128,1,float16,fp8,0,0.5215104103088379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,4,128,1,fp8,fp8,0,0.5210927963256836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,8,128,1,float16,float16,0,0.5966911792755127
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,8,128,1,float16,fp8,0,0.5235455989837646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,8,128,1,fp8,fp8,0,0.5228303909301758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,64,128,1,float16,float16,0,0.4623551845550537
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,64,64,128,1,float16,float16,0,0.8984416007995606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,1,128,1,float16,float16,0,0.28669118881225586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,64,128,1,float16,fp8,0,0.394321608543396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,64,128,1,fp8,fp8,0,0.3938352108001709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,1,128,1,float16,fp8,0,0.26719040870666505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,1,128,1,fp8,fp8,0,0.26824159622192384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,2,128,1,float16,float16,0,0.2851727962493896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,2,128,1,float16,fp8,0,0.2677135944366455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,2,128,1,fp8,fp8,0,0.2671679973602295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,4,128,1,float16,float16,0,0.29499680995941163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,4,128,1,float16,fp8,0,0.2685904026031494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,4,128,1,fp8,fp8,0,0.26835999488830564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,8,128,1,float16,float16,0,0.3029952049255371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,8,128,1,float16,fp8,0,0.26879520416259767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,64,8,128,1,fp8,fp8,0,0.2690975904464722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,64,128,1,float16,float16,0,0.23820478916168214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,64,128,1,float16,fp8,0,0.20492160320281982
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,1,128,1,float16,float16,0,0.1508080005645752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,1,128,1,float16,fp8,0,0.141428804397583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,1,128,1,fp8,fp8,0,0.1412287950515747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,2,128,1,float16,float16,0,0.15077760219573974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,2,128,1,float16,fp8,0,0.14122240543365477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,2,128,1,fp8,fp8,0,0.14158560037612916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,4,128,1,float16,float16,0,0.1537328004837036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,4,128,1,float16,fp8,0,0.14146080017089843
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,4,128,1,fp8,fp8,0,0.14186079502105714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,8,128,1,float16,float16,0,0.16096479892730714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,8,128,1,float16,fp8,0,0.14148800373077391
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,8,128,1,fp8,fp8,0,0.14189599752426146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,64,128,1,float16,float16,0,0.12853120565414428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,64,128,1,float16,fp8,0,0.11064480543136597
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,64,128,1,fp8,fp8,0,0.1107632040977478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,1,128,1,float16,fp8,0,0.07663519978523255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,1,128,1,fp8,fp8,0,0.07704480290412903
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,2,128,1,float16,float16,0,0.08406720161437989
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,2,128,1,float16,fp8,0,0.07770079970359803
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,2,128,1,fp8,fp8,0,0.0765936017036438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,4,128,1,float16,float16,0,0.08470720052719116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,4,128,1,float16,fp8,0,0.07672799825668335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,4,128,1,fp8,fp8,0,0.07634720206260681
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,8,128,1,float16,float16,0,0.0875536024570465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,8,128,1,float16,fp8,0,0.07789919972419738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,8,128,1,fp8,fp8,0,0.07780320048332215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,64,64,128,1,fp8,fp8,0,0.20562880039215087
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,64,128,1,float16,float16,0,0.07390559911727905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,64,128,1,float16,fp8,0,0.06199359893798828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,1,128,1,float16,float16,0,0.04929440021514893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,1,128,1,float16,fp8,0,0.04524320065975189
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,1,128,1,fp8,fp8,0,0.0453792005777359
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,2,128,1,float16,float16,0,0.04936319887638092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,2,128,1,float16,fp8,0,0.04523360133171082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,64,1,128,1,float16,float16,0,0.0834559977054596
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,2,128,1,fp8,fp8,0,0.045491200685501096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,4,128,1,float16,float16,0,0.049344000220298764
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,4,128,1,float16,fp8,0,0.04530400037765503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,4,128,1,fp8,fp8,0,0.045270401239395144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,8,128,1,float16,float16,0,0.05126399993896484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,8,128,1,float16,fp8,0,0.04522719979286194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,8,128,1,fp8,fp8,0,0.04539999961853027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,64,128,1,float16,float16,0,0.04114400148391724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,64,128,1,float16,fp8,0,0.037195199728012086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,64,128,1,fp8,fp8,0,0.0369951993227005
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,1,128,1,float16,float16,0,0.032971200346946714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,1,128,1,float16,fp8,0,0.03038719892501831
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,2,128,1,float16,float16,0,0.03296479880809784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,2,128,1,float16,fp8,0,0.030729600787162782
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,2,128,1,fp8,fp8,0,0.030665600299835206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,4,128,1,float16,float16,0,0.03296639919281006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,4,128,1,float16,fp8,0,0.030323201417922975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,4,128,1,fp8,fp8,0,0.030036801099777223
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,8,128,1,float16,float16,0,0.032948800921440126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,8,128,1,float16,fp8,0,0.029971200227737426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,8,128,1,fp8,fp8,0,0.02978079915046692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,64,64,128,1,fp8,fp8,0,0.06197919845581055
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,64,1,128,1,float16,float16,0,1.6432640075683593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,64,1,128,1,float16,fp8,0,1.5795248031616211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,64,1,128,1,fp8,fp8,0,1.5802687644958495
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,64,2,128,1,float16,float16,0,1.649617576599121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,64,2,128,1,float16,fp8,0,1.5791423797607422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,64,2,128,1,fp8,fp8,0,1.5792367935180665
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,64,4,128,1,float16,float16,0,1.6977888107299806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,64,4,128,1,float16,fp8,0,1.5864975929260254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,64,4,128,1,fp8,fp8,0,1.580414390563965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,64,8,128,1,float16,fp8,0,1.5835519790649415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,64,8,128,1,fp8,fp8,0,1.5828368186950683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,64,128,1,float16,fp8,0,1.27806396484375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,64,1,128,1,fp8,fp8,0,0.03001280128955841
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,1,128,1,float16,float16,0,0.8289600372314453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,64,128,1,fp8,fp8,0,1.2782400131225586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,64,8,128,1,float16,float16,0,1.7790159225463866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,1,128,1,float16,fp8,0,0.7983376026153565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,1,128,1,fp8,fp8,0,0.796950387954712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,2,128,1,float16,float16,0,0.8311823844909668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,2,128,1,float16,fp8,0,0.7969647884368897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,2,128,1,fp8,fp8,0,0.7964672088623047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,64,128,1,float16,float16,0,1.4745264053344727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,4,128,1,float16,float16,0,0.8536704063415528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,4,128,1,float16,fp8,0,0.7975376129150391
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,4,128,1,fp8,fp8,0,0.7963312149047852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,8,128,1,float16,float16,0,0.8912752151489258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,8,128,1,float16,fp8,0,0.7991903781890869
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,64,8,128,1,fp8,fp8,0,0.7996255874633789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,64,128,1,float16,float16,0,0.7451695919036865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,64,128,1,float16,fp8,0,0.6453407764434814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,1,128,1,float16,float16,0,0.42269120216369627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,1,128,1,float16,fp8,0,0.4040031909942627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,1,128,1,fp8,fp8,0,0.40442399978637694
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,2,128,1,float16,float16,0,0.42504000663757324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,2,128,1,float16,fp8,0,0.40417919158935545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,2,128,1,fp8,fp8,0,0.4038191795349121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,4,128,1,float16,float16,0,0.43546237945556643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,4,128,1,float16,fp8,0,0.4044832229614258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,4,128,1,fp8,fp8,0,0.4049536228179932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,8,128,1,float16,float16,0,0.45632481575012207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,8,128,1,float16,fp8,0,0.40561280250549314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,8,128,1,fp8,fp8,0,0.4057199954986572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,64,128,1,float16,float16,0,0.38192000389099123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,64,128,1,float16,fp8,0,0.329966402053833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,64,128,1,fp8,fp8,0,0.3297679901123047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,1,128,1,float16,fp8,0,0.20877280235290527
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,1,128,1,fp8,fp8,0,0.20878241062164307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,2,128,1,float16,float16,0,0.21792960166931152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,2,128,1,float16,fp8,0,0.20853118896484374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,2,128,1,fp8,fp8,0,0.20867519378662108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,4,128,1,float16,float16,0,0.22358880043029786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,64,64,128,1,fp8,fp8,0,0.6462751865386963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,4,128,1,float16,fp8,0,0.20881760120391846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,4,128,1,fp8,fp8,0,0.20874559879302979
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,8,128,1,float16,float16,0,0.2342384099960327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,8,128,1,float16,fp8,0,0.2092128038406372
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,64,128,1,float16,fp8,0,0.17238240242004393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,1,128,1,float16,float16,0,0.21897439956665038
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,64,128,1,fp8,fp8,0,0.17253600358963012
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,1,128,1,float16,float16,0,0.11675039529800416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,1,128,1,float16,fp8,0,0.11102240085601807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,1,128,1,fp8,fp8,0,0.11092480421066284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,2,128,1,float16,float16,0,0.11709760427474976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,2,128,1,float16,fp8,0,0.11064159870147705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,64,128,1,float16,float16,0,0.19819200038909912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,2,128,1,fp8,fp8,0,0.1107408046722412
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,4,128,1,float16,float16,0,0.11892160177230834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,4,128,1,fp8,fp8,0,0.11044960021972657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,8,128,1,float16,float16,0,0.12479039430618286
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,8,128,1,float16,fp8,0,0.11088320016860961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,8,128,1,fp8,fp8,0,0.11086560487747192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,64,128,1,float16,float16,0,0.1070255994796753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,64,128,1,float16,fp8,0,0.09262080192565918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,64,128,1,fp8,fp8,0,0.09259840250015258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,1,128,1,float16,float16,0,0.06561279892921448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,1,128,1,float16,fp8,0,0.05958880186080932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,1,128,1,fp8,fp8,0,0.05983039736747742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,2,128,1,float16,float16,0,0.06551520228385925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,2,128,1,float16,fp8,0,0.05994079709053039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,4,128,1,float16,float16,0,0.06592159867286682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,4,128,1,float16,fp8,0,0.06106079816818237
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,4,128,1,fp8,fp8,0,0.06083040237426758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,8,128,1,float16,float16,0,0.0701088011264801
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,8,128,1,float16,fp8,0,0.061286401748657224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,64,4,128,1,float16,fp8,0,0.11063040494918823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,8,128,1,fp8,fp8,0,0.060812801122665405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,64,128,1,float16,fp8,0,0.05168480277061462
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,64,128,1,fp8,fp8,0,0.05164480209350586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,1,128,1,float16,float16,0,0.037478399276733396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,1,128,1,float16,fp8,0,0.03508319854736328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,1,128,1,fp8,fp8,0,0.03512159883975983
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,2,128,1,float16,float16,0,0.03751200139522552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,2,128,1,float16,fp8,0,0.03517920076847077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,2,128,1,fp8,fp8,0,0.0353983998298645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,4,128,1,float16,float16,0,0.03874399960041046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,4,128,1,float16,fp8,0,0.03515360057353974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,4,128,1,fp8,fp8,0,0.03519040048122406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,64,2,128,1,fp8,fp8,0,0.059665602445602414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,8,128,1,float16,float16,0,0.03925440013408661
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,8,128,1,float16,fp8,0,0.03510560095310211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,64,128,1,float16,float16,0,0.0328000009059906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,64,128,1,float16,fp8,0,0.030926400423049928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,64,128,1,fp8,fp8,0,0.030883198976516722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,1,128,1,float16,float16,0,0.02475679963827133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,1,128,1,float16,fp8,0,0.02269279956817627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,64,8,128,1,fp8,fp8,0,0.2090480089187622
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,1,128,1,fp8,fp8,0,0.023056000471115112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,2,128,1,float16,float16,0,0.024931199848651886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,2,128,1,float16,fp8,0,0.023379200696945192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,2,128,1,fp8,fp8,0,0.02290239930152893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,4,128,1,float16,float16,0,0.024908800423145295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,64,128,1,float16,float16,0,0.060043197870254514
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,4,128,1,fp8,fp8,0,0.023124800622463228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,8,128,1,float16,float16,0,0.026630398631095887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,8,128,1,float16,fp8,0,0.023472000658512116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,8,128,1,fp8,fp8,0,0.02306240051984787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,64,128,1,float16,float16,0,0.01950719952583313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,64,128,1,float16,fp8,0,0.019963200390338897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,64,128,1,fp8,fp8,0,0.020524799823760986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,1,128,1,float16,float16,0,0.016763199865818024
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,1,128,1,float16,fp8,0,0.01648160070180893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,1,128,1,fp8,fp8,0,0.016505600512027742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,2,128,1,float16,float16,0,0.01658719927072525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,2,128,1,float16,fp8,0,0.016516800224781036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,2,128,1,fp8,fp8,0,0.01653439998626709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,4,128,1,float16,float16,0,0.016607999801635742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,4,128,1,float16,fp8,0,0.016547200083732606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,4,128,1,fp8,fp8,0,0.01656160056591034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,8,128,1,float16,float16,0,0.016574400663375854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,8,128,1,float16,fp8,0,0.01647839993238449
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,64,8,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,64,1,128,1,float16,float16,0,0.6794991970062256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,64,1,128,1,float16,fp8,0,0.6675360202789307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,64,8,128,1,fp8,fp8,0,0.03527520000934601
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,64,1,128,1,fp8,fp8,0,0.6677248001098632
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,64,2,128,1,float16,float16,0,0.6818111896514892
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,64,2,128,1,float16,fp8,0,0.6655727863311768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,64,4,128,1,float16,fp8,0,0.022811199724674224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,64,2,128,1,fp8,fp8,0,0.6665328025817872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,64,4,128,1,float16,float16,0,0.7012656211853028
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,64,4,128,1,float16,fp8,0,0.6652224063873291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,64,4,128,1,fp8,fp8,0,0.6647903919219971
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,64,8,128,1,float16,float16,0,0.7410607814788819
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,64,8,128,1,fp8,fp8,0,0.6651360034942627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,64,128,1,float16,float16,0,0.6705999851226807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,64,128,1,float16,fp8,0,0.5719583988189697
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,1,128,1,float16,float16,0,0.345249605178833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,1,128,1,float16,fp8,0,0.3384959936141968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,64,128,1,fp8,fp8,0,0.5710000038146973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,1,128,1,fp8,fp8,0,0.3382400035858154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,2,128,1,float16,float16,0,0.34710240364074707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,2,128,1,float16,fp8,0,0.3378607988357544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,2,128,1,fp8,fp8,0,0.33821120262146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,4,128,1,float16,float16,0,0.35664639472961424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,4,128,1,float16,fp8,0,0.3379584074020386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,4,128,1,fp8,fp8,0,0.3384848117828369
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,8,128,1,float16,float16,0,0.3772272109985352
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,8,128,1,fp8,fp8,0,0.33810720443725584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,64,128,1,float16,float16,0,0.3411151885986328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,64,128,1,float16,fp8,0,0.29101760387420655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,64,128,1,fp8,fp8,0,0.29108800888061526
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,1,128,1,float16,float16,0,0.17867679595947267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,1,128,1,float16,fp8,0,0.17417759895324708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,1,128,1,fp8,fp8,0,0.17403680086135864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,2,128,1,float16,float16,0,0.17917280197143554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,2,128,1,float16,fp8,0,0.17424479722976685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,64,8,128,1,float16,fp8,0,0.3379024028778076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,2,128,1,fp8,fp8,0,0.17427680492401124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,4,128,1,float16,float16,0,0.1842095971107483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,4,128,1,float16,fp8,0,0.17433120012283326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,4,128,1,fp8,fp8,0,0.1742303967475891
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,8,128,1,float16,float16,0,0.19498080015182495
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,64,8,128,1,float16,fp8,0,0.6660143852233886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,8,128,1,float16,fp8,0,0.17435359954833984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,64,8,128,1,fp8,fp8,0,0.17443039417266845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,64,128,1,float16,float16,0,0.1780832052230835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,64,128,1,float16,fp8,0,0.15219999551773072
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,64,128,1,fp8,fp8,0,0.15217280387878418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,1,128,1,float16,float16,0,0.0965232014656067
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,1,128,1,float16,fp8,0,0.09268479943275451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,1,128,1,fp8,fp8,0,0.0926367998123169
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,2,128,1,float16,fp8,0,0.09268159866333008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,2,128,1,fp8,fp8,0,0.09376639723777772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,4,128,1,float16,float16,0,0.09964479804039002
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,4,128,1,float16,fp8,0,0.0941760003566742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,4,128,1,fp8,fp8,0,0.09239839911460876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,8,128,1,float16,fp8,0,0.09265120029449463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,8,128,1,fp8,fp8,0,0.09294400215148926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,64,128,1,float16,float16,0,0.09591519832611084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,64,128,1,float16,fp8,0,0.0824832022190094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,1,128,1,float16,float16,0,0.05342400074005127
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,64,128,1,fp8,fp8,0,0.0824288010597229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,1,128,1,float16,fp8,0,0.04936160147190094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,1,128,1,fp8,fp8,0,0.049728000164031984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,2,128,1,float16,float16,0,0.05386880040168762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,2,128,1,float16,fp8,0,0.05101760029792786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,2,128,1,fp8,fp8,0,0.049563199281692505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,4,128,1,float16,fp8,0,0.05071359872817993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,4,128,1,fp8,fp8,0,0.049623998999595645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,8,128,1,float16,float16,0,0.05754079818725586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,8,128,1,float16,fp8,0,0.04979040026664734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,8,128,1,fp8,fp8,0,0.0506816029548645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,64,128,1,float16,float16,0,0.05612959861755371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,64,128,1,float16,fp8,0,0.04734399914741516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,64,128,1,fp8,fp8,0,0.047353601455688475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,2,128,1,float16,float16,0,0.09676960110664368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,1,128,1,float16,float16,0,0.0323168009519577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,1,128,1,float16,fp8,0,0.030937600135803222
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,1,128,1,fp8,fp8,0,0.030913600325584413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,2,128,1,float16,float16,0,0.03298720121383667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,2,128,1,float16,fp8,0,0.030902400612831116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,2,128,1,fp8,fp8,0,0.030931198596954347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,4,128,1,float16,float16,0,0.03300159871578216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,4,128,1,float16,fp8,0,0.030943998694419862
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,64,8,128,1,float16,float16,0,0.10460319519042968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,8,128,1,float16,float16,0,0.035097599029541016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,8,128,1,float16,fp8,0,0.03091840147972107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,8,128,1,fp8,fp8,0,0.030921599268913268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,64,128,1,float16,float16,0,0.028505599498748778
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,64,128,1,float16,fp8,0,0.0267984002828598
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,64,128,1,fp8,fp8,0,0.0269679993391037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,1,128,1,float16,float16,0,0.020644800364971162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,1,128,1,float16,fp8,0,0.02054399996995926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,2,128,1,float16,float16,0,0.020707200467586517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,2,128,1,float16,fp8,0,0.020553599298000335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,2,128,1,fp8,fp8,0,0.020644800364971162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,4,128,1,float16,float16,0,0.020849600434303284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,4,128,1,float16,fp8,0,0.020075200498104094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,64,4,128,1,float16,float16,0,0.0555728018283844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,4,128,1,fp8,fp8,0,0.020363199710845947
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,8,128,1,float16,float16,0,0.021508799493312837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,8,128,1,float16,fp8,0,0.02008160054683685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,8,128,1,fp8,fp8,0,0.02066880017518997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,64,128,1,float16,float16,0,0.016607999801635742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,64,128,1,float16,fp8,0,0.01851679980754852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,64,128,1,fp8,fp8,0,0.018561600148677825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,1,128,1,float16,float16,0,0.014558400213718414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,1,128,1,float16,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,1,128,1,fp8,fp8,0,0.01443839967250824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,2,128,1,float16,float16,0,0.014526399970054626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,2,128,1,float16,fp8,0,0.014430400729179383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,2,128,1,fp8,fp8,0,0.014537599682807923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,4,128,1,float16,float16,0,0.014590400457382201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,4,128,1,float16,fp8,0,0.014455999433994293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,8,128,1,float16,float16,0,0.01462399959564209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,8,128,1,float16,fp8,0,0.014475199580192565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,8,128,1,fp8,fp8,0,0.014451199769973755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,64,128,1,float16,float16,0,0.014727999269962311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,64,128,1,float16,fp8,0,0.014734399318695069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,64,128,1,fp8,fp8,0,0.014620800316333771
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,64,4,128,1,fp8,fp8,0,0.030928000807762146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,1,128,1,float16,float16,0,0.0144896000623703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,1,128,1,float16,fp8,0,0.01252480000257492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,1,128,1,fp8,fp8,0,0.01252640038728714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,2,128,1,float16,float16,0,0.01247360035777092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,2,128,1,float16,fp8,0,0.012540799379348756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,2,128,1,fp8,fp8,0,0.012438400089740754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,4,128,1,float16,float16,0,0.014508800208568573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,64,1,128,1,fp8,fp8,0,0.020559999346733093
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,4,128,1,float16,fp8,0,0.012540799379348756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,4,128,1,fp8,fp8,0,0.01249760016798973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,8,128,1,float16,float16,0,0.014395199716091156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,8,128,1,float16,fp8,0,0.012531200051307678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,64,8,128,1,fp8,fp8,0,0.01263359934091568
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,64,1,128,1,float16,float16,0,0.41566081047058107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,64,1,128,1,fp8,fp8,0,0.4102479934692383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,64,2,128,1,float16,float16,0,0.4163519859313965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,64,4,128,1,fp8,fp8,0,0.014422400295734406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,64,2,128,1,float16,fp8,0,0.4099103927612305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,64,2,128,1,fp8,fp8,0,0.41027679443359377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,64,4,128,1,float16,float16,0,0.4253407955169678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,64,4,128,1,float16,fp8,0,0.41016640663146975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,64,4,128,1,fp8,fp8,0,0.41000962257385254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,64,8,128,1,float16,float16,0,0.44469280242919923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,64,8,128,1,float16,fp8,0,0.40949277877807616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,64,1,128,1,float16,fp8,0,0.4098527908325195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,64,8,128,1,fp8,fp8,0,0.40999841690063477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,64,128,1,float16,float16,0,0.3628144025802612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,64,128,1,float16,fp8,0,0.32441120147705077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,1,128,1,float16,float16,0,0.21460320949554443
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,1,128,1,float16,fp8,0,0.2098383903503418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,1,128,1,fp8,fp8,0,0.21064000129699706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,2,128,1,float16,float16,0,0.21458559036254882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,2,128,1,float16,fp8,0,0.20943679809570312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,2,128,1,fp8,fp8,0,0.20935840606689454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,4,128,1,float16,float16,0,0.21939680576324463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,4,128,1,fp8,fp8,0,0.20963358879089355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,4,128,1,float16,fp8,0,0.20953760147094727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,8,128,1,float16,float16,0,0.2278304100036621
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,8,128,1,float16,fp8,0,0.20960960388183594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,8,128,1,fp8,fp8,0,0.20945439338684083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,64,128,1,float16,float16,0,0.18664000034332276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,64,128,1,float16,fp8,0,0.16826080083847045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,64,128,1,fp8,fp8,0,0.1684064030647278
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,1,128,1,float16,fp8,0,0.11047519445419311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,1,128,1,fp8,fp8,0,0.10889439582824707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,2,128,1,float16,float16,0,0.11307040452957154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,2,128,1,float16,fp8,0,0.10936959981918334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,2,128,1,fp8,fp8,0,0.11033600568771362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,4,128,1,float16,float16,0,0.11518399715423584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,4,128,1,float16,fp8,0,0.11038559675216675
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,8,128,1,float16,float16,0,0.1209231972694397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,8,128,1,float16,fp8,0,0.11028159856796264
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,64,64,128,1,fp8,fp8,0,0.32609119415283205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,8,128,1,fp8,fp8,0,0.10980000495910644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,64,128,1,float16,float16,0,0.10053280591964722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,64,128,1,float16,fp8,0,0.09038879871368408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,64,128,1,fp8,fp8,0,0.09049599766731262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,1,128,1,float16,float16,0,0.061806398630142215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,1,128,1,float16,fp8,0,0.059652799367904664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,1,128,1,fp8,fp8,0,0.05965920090675354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,2,128,1,float16,float16,0,0.06255040168762208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,2,128,1,float16,fp8,0,0.05966079831123352
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,2,128,1,fp8,fp8,0,0.05977759957313537
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,4,128,1,float16,fp8,0,0.05964319705963135
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,4,128,1,fp8,fp8,0,0.05963360071182251
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,8,128,1,float16,float16,0,0.06628000140190124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,8,128,1,float16,fp8,0,0.059862399101257326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,8,128,1,fp8,fp8,0,0.05975199937820434
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,1,128,1,float16,float16,0,0.11281440258026124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,64,128,1,float16,fp8,0,0.04930880069732666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,64,128,1,fp8,fp8,0,0.04929920136928558
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,1,128,1,float16,float16,0,0.034969601035118106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,1,128,1,float16,fp8,0,0.0330159991979599
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,1,128,1,fp8,fp8,0,0.0329584002494812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,2,128,1,float16,float16,0,0.0349151998758316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,64,4,128,1,fp8,fp8,0,0.10890400409698486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,2,128,1,float16,fp8,0,0.033032000064849854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,2,128,1,fp8,fp8,0,0.032995200157165526
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,4,128,1,float16,float16,0,0.035062399506568906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,4,128,1,float16,fp8,0,0.032979199290275575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,4,128,1,fp8,fp8,0,0.03299840092658997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,8,128,1,float16,float16,0,0.03700479865074158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,8,128,1,float16,fp8,0,0.03297599852085113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,8,128,1,fp8,fp8,0,0.033036801218986514
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,64,128,1,float16,fp8,0,0.02890399992465973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,64,128,1,fp8,fp8,0,0.028911998867988585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,1,128,1,float16,float16,0,0.02274080067873001
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,1,128,1,float16,fp8,0,0.021209600567817687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,64,4,128,1,float16,float16,0,0.06379039883613587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,1,128,1,fp8,fp8,0,0.021110400557518005
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,2,128,1,float16,float16,0,0.022788800299167633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,2,128,1,float16,fp8,0,0.022430400550365447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,2,128,1,fp8,fp8,0,0.022708800435066224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,4,128,1,float16,float16,0,0.022702400386333466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,4,128,1,float16,fp8,0,0.022675199806690215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,4,128,1,fp8,fp8,0,0.021673600375652313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,8,128,1,float16,float16,0,0.023580799996852874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,8,128,1,float16,fp8,0,0.0227183997631073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,8,128,1,fp8,fp8,0,0.022675199806690215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,64,128,1,float16,float16,0,0.01865600049495697
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,64,128,1,float16,fp8,0,0.01870879977941513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,64,128,1,fp8,fp8,0,0.018668800592422485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,1,128,1,float16,float16,0,0.01465120017528534
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,1,128,1,float16,fp8,0,0.014620800316333771
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,1,128,1,fp8,fp8,0,0.014721600711345673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,2,128,1,float16,float16,0,0.014539200067520141
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,64,64,128,1,float16,float16,0,0.03089759945869446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,2,128,1,fp8,fp8,0,0.014550399780273438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,4,128,1,float16,float16,0,0.015048000216484069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,4,128,1,float16,fp8,0,0.014580799639225006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,4,128,1,fp8,fp8,0,0.01451520025730133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,8,128,1,float16,float16,0,0.014585599303245544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,8,128,1,float16,fp8,0,0.014542399346828461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,8,128,1,fp8,fp8,0,0.014502400159835815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,64,128,1,float16,float16,0,0.012518399953842163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,64,128,1,float16,fp8,0,0.012479999661445617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,64,128,1,fp8,fp8,0,0.012596799433231354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,1,128,1,float16,float16,0,0.01109279990196228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,1,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,1,128,1,fp8,fp8,0,0.010585600137710571
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,64,64,128,1,float16,float16,0,0.055345600843429564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,2,128,1,float16,float16,0,0.011020799726247787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,64,2,128,1,float16,fp8,0,0.014579200744628906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,2,128,1,float16,fp8,0,0.010599999874830245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,2,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,4,128,1,float16,float16,0,0.0124719999730587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,4,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,4,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,8,128,1,float16,float16,0,0.012375999987125397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,8,128,1,float16,fp8,0,0.010931199789047242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,64,8,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,64,128,1,float16,float16,0,0.012636800110340119
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,64,128,1,float16,fp8,0,0.012462399899959564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,1,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,1,128,1,float16,fp8,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,1,128,1,fp8,fp8,0,0.010590399801731109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,2,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,2,128,1,float16,fp8,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,2,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,4,128,1,float16,float16,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,4,128,1,float16,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,4,128,1,fp8,fp8,0,0.01064639985561371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,8,128,1,float16,float16,0,0.010532800108194351
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,8,128,1,float16,fp8,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,8,128,1,fp8,fp8,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,64,1,128,1,float16,float16,0,0.3444655895233154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,64,1,128,1,float16,fp8,0,0.334768009185791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,64,1,128,1,fp8,fp8,0,0.33409440517425537
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,64,2,128,1,float16,float16,0,0.3462896108627319
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,64,2,128,1,float16,fp8,0,0.33424160480499265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,64,2,128,1,fp8,fp8,0,0.334332799911499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,64,4,128,1,float16,float16,0,0.3509919881820679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,64,4,128,1,float16,fp8,0,0.33397760391235354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,64,4,128,1,fp8,fp8,0,0.33380320072174074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,64,8,128,1,float16,float16,0,0.3592416048049927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,64,8,128,1,float16,fp8,0,0.33355519771575926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,64,128,1,float16,float16,0,0.25053279399871825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,64,8,128,1,fp8,fp8,0,0.33304319381713865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,64,128,1,float16,fp8,0,0.22841279506683348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,1,128,1,float16,float16,0,0.1783408045768738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,64,128,1,fp8,fp8,0,0.22958080768585204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,64,64,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,1,128,1,float16,fp8,0,0.1723871946334839
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,1,128,1,fp8,fp8,0,0.1721984028816223
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,2,128,1,float16,float16,0,0.18007199764251708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,2,128,1,float16,fp8,0,0.172326397895813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,2,128,1,fp8,fp8,0,0.1723407983779907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,4,128,1,float16,float16,0,0.1804911971092224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,4,128,1,float16,fp8,0,0.17216960191726685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,8,128,1,float16,float16,0,0.18649439811706542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,8,128,1,float16,fp8,0,0.1709280014038086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,8,128,1,fp8,fp8,0,0.17218079566955566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,64,128,1,float16,float16,0,0.12974560260772705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,64,128,1,float16,fp8,0,0.11892960071563721
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,1,128,1,float16,float16,0,0.09481120109558105
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,1,128,1,float16,fp8,0,0.08991680145263672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,1,128,1,fp8,fp8,0,0.09031040072441102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,2,128,1,float16,float16,0,0.09465759992599487
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,2,128,1,float16,fp8,0,0.09029920101165771
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,2,128,1,fp8,fp8,0,0.08996319770812988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,4,128,1,float16,float16,0,0.09670079946517944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,4,128,1,float16,fp8,0,0.08997759819030762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,4,128,1,fp8,fp8,0,0.09033920168876648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,8,128,1,float16,float16,0,0.09879040122032165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,8,128,1,float16,fp8,0,0.0902895987033844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,8,128,1,fp8,fp8,0,0.09021279811859131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,64,128,1,float16,float16,0,0.07161759734153747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,64,128,1,float16,fp8,0,0.06516799926757813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,64,128,1,fp8,fp8,0,0.06382079720497132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,1,128,1,float16,float16,0,0.05135679841041565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,1,128,1,float16,fp8,0,0.049374398589134214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,1,128,1,fp8,fp8,0,0.049318400025367734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,2,128,1,float16,float16,0,0.05152159929275513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,2,128,1,float16,fp8,0,0.049348801374435425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,2,128,1,fp8,fp8,0,0.04941279888153076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,4,128,1,float16,float16,0,0.05140320062637329
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,4,128,1,float16,fp8,0,0.049379199743270874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,4,128,1,fp8,fp8,0,0.049414399266242984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,8,128,1,float16,float16,0,0.05351679921150208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,8,128,1,float16,fp8,0,0.04930559992790222
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,64,128,1,float16,float16,0,0.03701919913291931
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,64,128,1,float16,fp8,0,0.035087999701499936
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,64,128,1,fp8,fp8,0,0.03507040143013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,1,128,1,float16,float16,0,0.029388800263404846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,1,128,1,float16,fp8,0,0.02884959876537323
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,1,128,1,fp8,fp8,0,0.028803199529647827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,64,64,128,1,fp8,fp8,0,0.11970399618148804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,2,128,1,float16,float16,0,0.029683199524879456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,2,128,1,float16,fp8,0,0.02877599895000458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,2,128,1,fp8,fp8,0,0.028803199529647827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,4,128,1,float16,float16,0,0.030803200602531434
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,4,128,1,float16,fp8,0,0.028812798857688903
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,4,128,1,fp8,fp8,0,0.02879360020160675
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,8,128,1,float16,float16,0,0.03089759945869446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,8,128,1,float16,fp8,0,0.029281601309776306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,64,8,128,1,fp8,fp8,0,0.028774398565292358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,64,128,1,float16,float16,0,0.022710399329662324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,64,128,1,float16,fp8,0,0.02285760045051575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,64,128,1,fp8,fp8,0,0.022711999714374542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,1,128,1,float16,float16,0,0.0205375999212265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,1,128,1,float16,fp8,0,0.018639999628067016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,64,8,128,1,fp8,fp8,0,0.049332800507545474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,1,128,1,fp8,fp8,0,0.018780800700187682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,2,128,1,float16,float16,0,0.020718400180339814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,2,128,1,float16,fp8,0,0.018646399676799773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,2,128,1,fp8,fp8,0,0.01878879964351654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,4,128,1,float16,float16,0,0.020604799687862396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,4,128,1,float16,fp8,0,0.018780800700187682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,64,4,128,1,fp8,fp8,0,0.1722208023071289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,8,128,1,float16,float16,0,0.020740799605846405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,8,128,1,float16,fp8,0,0.018606400489807128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,8,128,1,fp8,fp8,0,0.0187376007437706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,64,128,1,float16,float16,0,0.016547200083732606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,64,128,1,float16,fp8,0,0.014689600467681885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,64,128,1,fp8,fp8,0,0.014552000164985656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,1,128,1,float16,float16,0,0.01448799967765808
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,1,128,1,float16,fp8,0,0.0125231996178627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,1,128,1,fp8,fp8,0,0.014207999408245086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,2,128,1,float16,fp8,0,0.014006400108337402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,2,128,1,fp8,fp8,0,0.014319999516010285
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,4,128,1,float16,float16,0,0.014496000111103058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,4,128,1,float16,fp8,0,0.01433439999818802
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,4,128,1,fp8,fp8,0,0.013899199664592743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,8,128,1,float16,float16,0,0.014531199634075165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,8,128,1,float16,fp8,0,0.014100800454616546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,8,128,1,fp8,fp8,0,0.01255040019750595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,64,128,1,float16,float16,0,0.01250080019235611
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,64,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,64,128,1,fp8,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,1,128,1,float16,float16,0,0.010590399801731109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,1,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,2,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,2,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,2,128,1,fp8,fp8,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,4,128,1,float16,float16,0,0.01061440035700798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,4,128,1,float16,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,4,128,1,fp8,fp8,0,0.010620799660682679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,64,4,128,1,fp8,fp8,0,0.018769599497318268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,8,128,1,float16,float16,0,0.010592000186443329
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,8,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,64,128,1,float16,float16,0,0.01239520013332367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,64,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,64,128,1,fp8,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,1,128,1,float16,float16,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,1,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,1,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,2,128,1,float16,float16,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,2,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,2,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,4,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,4,128,1,float16,fp8,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,4,128,1,fp8,fp8,0,0.010313600301742554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,8,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,8,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,64,8,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,64,1,128,1,float16,float16,0,0.305348801612854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,1,128,1,fp8,fp8,0,0.010627199709415436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,64,1,128,1,float16,fp8,0,0.29072959423065187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,64,1,128,1,fp8,fp8,0,0.2903728008270264
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,64,2,128,1,float16,float16,0,0.3053391933441162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,64,2,128,1,float16,fp8,0,0.2897200107574463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,64,8,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,64,2,128,1,fp8,fp8,0,0.2906816005706787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,64,4,128,1,float16,float16,0,0.3064800024032593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,64,4,128,1,float16,fp8,0,0.2910847902297974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,64,4,128,1,fp8,fp8,0,0.290451192855835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,64,8,128,1,float16,float16,0,0.3106895923614502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,64,8,128,1,float16,fp8,0,0.2907056093215942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,64,8,128,1,fp8,fp8,0,0.28953120708465574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,64,128,1,float16,float16,0,0.19209599494934082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,64,128,1,float16,fp8,0,0.17850719690322875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,64,128,1,fp8,fp8,0,0.17857279777526855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,1,128,1,float16,float16,0,0.1579632043838501
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,1,128,1,float16,fp8,0,0.14975680112838746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,1,128,1,fp8,fp8,0,0.14973280429840088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,2,128,1,float16,float16,0,0.15800960063934327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,2,128,1,float16,fp8,0,0.14976160526275634
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,2,128,1,fp8,fp8,0,0.14977439641952514
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,4,128,1,float16,float16,0,0.15948159694671632
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,4,128,1,float16,fp8,0,0.14969439506530763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,8,128,1,float16,float16,0,0.1619488000869751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,8,128,1,float16,fp8,0,0.14973920583724976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,8,128,1,fp8,fp8,0,0.14973440170288085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,64,128,1,float16,float16,0,0.10124479532241822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,64,128,1,float16,fp8,0,0.09441599845886231
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,64,128,1,fp8,fp8,0,0.09446240067481995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,64,2,128,1,float16,float16,0,0.014475199580192565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,1,128,1,float16,fp8,0,0.0780239999294281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,1,128,1,fp8,fp8,0,0.07802079916000366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,2,128,1,float16,float16,0,0.08425599932670594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,2,128,1,float16,fp8,0,0.07798879742622375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,2,128,1,fp8,fp8,0,0.07799839973449707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,4,128,1,float16,float16,0,0.08422080278396607
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,4,128,1,float16,fp8,0,0.07796159982681275
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,64,4,128,1,fp8,fp8,0,0.14971200227737427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,4,128,1,fp8,fp8,0,0.07804160118103028
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,8,128,1,float16,float16,0,0.08628640174865723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,8,128,1,float16,fp8,0,0.07810720205307006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,8,128,1,fp8,fp8,0,0.0781503975391388
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,64,128,1,float16,float16,0,0.05392640233039856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,64,128,1,float16,fp8,0,0.05127360224723816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,64,128,1,fp8,fp8,0,0.0514303982257843
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,64,1,128,1,float16,float16,0,0.08224160075187684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,1,128,1,float16,float16,0,0.04539200067520142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,1,128,1,float16,fp8,0,0.04320319890975952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,1,128,1,fp8,fp8,0,0.04317919909954071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,2,128,1,float16,float16,0,0.045363199710845944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,2,128,1,float16,fp8,0,0.043268799781799316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,2,128,1,fp8,fp8,0,0.043208000063896176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,4,128,1,float16,float16,0,0.047275200486183167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,4,128,1,float16,fp8,0,0.04326559901237488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,4,128,1,fp8,fp8,0,0.043244799971580504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,8,128,1,float16,float16,0,0.04739679992198944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,8,128,1,float16,fp8,0,0.04316959977149963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,64,8,128,1,fp8,fp8,0,0.04318720102310181
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,64,128,1,float16,fp8,0,0.028940799832344054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,64,128,1,fp8,fp8,0,0.02887359857559204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,1,128,1,float16,float16,0,0.02678399980068207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,1,128,1,float16,fp8,0,0.02498079985380173
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,1,128,1,fp8,fp8,0,0.024774399399757386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,2,128,1,float16,float16,0,0.026796799898147584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,2,128,1,float16,fp8,0,0.025316798686981203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,2,128,1,fp8,fp8,0,0.02678079903125763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,4,128,1,float16,float16,0,0.026788800954818726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,4,128,1,float16,fp8,0,0.026819199323654175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,4,128,1,fp8,fp8,0,0.024899199604988098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,8,128,1,float16,float16,0,0.02707839906215668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,8,128,1,float16,fp8,0,0.024868799746036528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,8,128,1,fp8,fp8,0,0.02643359899520874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,64,128,1,float16,fp8,0,0.01921280026435852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,64,128,1,fp8,fp8,0,0.01863040030002594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,1,128,1,float16,float16,0,0.018688000738620758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,1,128,1,float16,fp8,0,0.016595199704170227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,1,128,1,fp8,fp8,0,0.016646400094032288
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,2,128,1,float16,float16,0,0.018612800538539885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,2,128,1,float16,fp8,0,0.01658560037612915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,2,128,1,fp8,fp8,0,0.016680000722408293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,4,128,1,float16,float16,0,0.018755200505256652
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,4,128,1,float16,fp8,0,0.017481599748134614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,4,128,1,fp8,fp8,0,0.01860480010509491
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,8,128,1,float16,float16,0,0.01866080015897751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,8,128,1,float16,fp8,0,0.018593600392341612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,8,128,1,fp8,fp8,0,0.017080000042915343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,64,128,1,float16,float16,0,0.014548799395561219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,64,128,1,float16,fp8,0,0.01446239948272705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,64,128,1,fp8,fp8,0,0.014420799911022186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,1,128,1,float16,float16,0,0.012556800246238708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,1,128,1,float16,fp8,0,0.012479999661445617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,1,128,1,fp8,fp8,0,0.012534399330615998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,2,128,1,float16,float16,0,0.013302400708198547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,2,128,1,float16,fp8,0,0.012462399899959564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,2,128,1,fp8,fp8,0,0.012444800138473511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,4,128,1,float16,float16,0,0.012563200294971466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,4,128,1,float16,fp8,0,0.012460800260305405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,64,64,128,1,float16,float16,0,0.030806401371955873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,4,128,1,fp8,fp8,0,0.012520000338554382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,8,128,1,float16,fp8,0,0.012598399817943574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,8,128,1,fp8,fp8,0,0.012478400021791458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,64,128,1,float16,float16,0,0.013060800731182098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,64,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,64,128,1,fp8,fp8,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,1,128,1,float16,float16,0,0.010371199995279311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,1,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,2,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,2,128,1,float16,fp8,0,0.010302399843931198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,2,128,1,fp8,fp8,0,0.010340800136327743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,64,64,128,1,float16,float16,0,0.020707200467586517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,4,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,4,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,4,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,8,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,8,128,1,float16,fp8,0,0.010527999699115753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,8,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,64,128,1,float16,float16,0,0.012403199821710587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,64,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,64,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,1,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,1,128,1,float16,fp8,0,0.010356800258159637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,1,128,1,fp8,fp8,0,0.00843520015478134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,2,128,1,float16,float16,0,0.011374399811029435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,2,128,1,float16,fp8,0,0.008369600027799606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,2,128,1,fp8,fp8,0,0.010283199697732925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,4,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,4,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,4,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,8,128,1,float16,float16,0,0.010307200253009796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,8,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,64,8,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,64,1,128,1,float16,float16,0,0.29511840343475343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,64,1,128,1,float16,fp8,0,0.2762320041656494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,64,8,128,1,float16,float16,0,0.012583999335765839
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,64,1,128,1,fp8,fp8,0,0.2759887933731079
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,64,2,128,1,float16,float16,0,0.2936000108718872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,64,1,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,64,2,128,1,fp8,fp8,0,0.2766832113265991
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,64,4,128,1,float16,float16,0,0.29321119785308836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,64,4,128,1,float16,fp8,0,0.2765952110290527
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,64,4,128,1,fp8,fp8,0,0.2767535924911499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,64,8,128,1,float16,float16,0,0.2967216014862061
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,64,8,128,1,float16,fp8,0,0.2756880044937134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,64,128,1,float16,float16,0,0.1678928017616272
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,64,8,128,1,fp8,fp8,0,0.2764928102493286
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,64,128,1,float16,fp8,0,0.15782400369644164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,64,128,1,fp8,fp8,0,0.15812159776687623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,1,128,1,float16,float16,0,0.15172640085220337
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,1,128,1,float16,fp8,0,0.1417359948158264
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,1,128,1,fp8,fp8,0,0.14157919883728026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,2,128,1,float16,float16,0,0.1517840027809143
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,2,128,1,float16,fp8,0,0.14148319959640504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,2,128,1,fp8,fp8,0,0.1415552020072937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,4,128,1,float16,float16,0,0.1508080005645752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,4,128,1,float16,fp8,0,0.1416991949081421
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,4,128,1,fp8,fp8,0,0.14151519536972046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,8,128,1,float16,float16,0,0.15400160551071168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,8,128,1,float16,fp8,0,0.14151040315628052
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,64,2,128,1,float16,fp8,0,0.27659039497375487
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,64,8,128,1,fp8,fp8,0,0.14156960248947142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,64,128,1,float16,float16,0,0.08859360218048096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,64,128,1,float16,fp8,0,0.08208640217781067
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,64,128,1,fp8,fp8,0,0.08218079805374146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,1,128,1,float16,float16,0,0.08011199831962586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,1,128,1,float16,fp8,0,0.07452800273895263
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,1,128,1,fp8,fp8,0,0.07597439885139465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,2,128,1,float16,float16,0,0.08002880215644836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,2,128,1,float16,fp8,0,0.07514719963073731
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,2,128,1,fp8,fp8,0,0.07604640126228332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,4,128,1,float16,float16,0,0.08015199899673461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,4,128,1,float16,fp8,0,0.07451679706573486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,4,128,1,fp8,fp8,0,0.07592960000038147
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,8,128,1,float16,fp8,0,0.07501599788665772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,8,128,1,fp8,fp8,0,0.07596960067749023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,64,128,1,float16,float16,0,0.047249600291252136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,64,128,1,float16,fp8,0,0.045342400670051575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,64,128,1,fp8,fp8,0,0.04531840085983276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,1,128,1,float16,float16,0,0.045388799905776975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,1,128,1,float16,fp8,0,0.041875201463699344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,1,128,1,fp8,fp8,0,0.04323680102825165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,2,128,1,float16,float16,0,0.04534560143947601
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,2,128,1,float16,fp8,0,0.041736000776290895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,2,128,1,fp8,fp8,0,0.04317600131034851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,4,128,1,float16,float16,0,0.04519200026988983
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,4,128,1,float16,fp8,0,0.042233601212501526
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,4,128,1,fp8,fp8,0,0.0424591988325119
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,8,128,1,float16,fp8,0,0.04142720103263855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,8,128,1,fp8,fp8,0,0.04262399971485138
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,64,128,1,float16,float16,0,0.028891199827194215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,64,128,1,fp8,fp8,0,0.02693760097026825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,1,128,1,float16,float16,0,0.02696479856967926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,1,128,1,float16,fp8,0,0.02494560033082962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,1,128,1,fp8,fp8,0,0.024849599599838255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,2,128,1,float16,float16,0,0.026927998661994933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,2,128,1,float16,fp8,0,0.024929599463939668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,2,128,1,fp8,fp8,0,0.0250575989484787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,4,128,1,float16,float16,0,0.02696479856967926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,4,128,1,float16,fp8,0,0.02492000013589859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,4,128,1,fp8,fp8,0,0.024846400320529937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,8,128,1,float16,float16,0,0.026873600482940675
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,8,128,1,float16,fp8,0,0.02489600032567978
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,8,128,1,fp8,fp8,0,0.024937599897384644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,64,8,128,1,float16,float16,0,0.08006719946861267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,64,128,1,float16,float16,0,0.020598399639129638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,64,128,1,float16,fp8,0,0.018641600012779237
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,64,128,1,fp8,fp8,0,0.018639999628067016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,1,128,1,float16,float16,0,0.018639999628067016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,1,128,1,float16,fp8,0,0.017529599368572235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,1,128,1,fp8,fp8,0,0.016676799952983858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,2,128,1,float16,float16,0,0.018667200207710268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,2,128,1,float16,fp8,0,0.016548800468444824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,2,128,1,fp8,fp8,0,0.017735999822616578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,4,128,1,float16,float16,0,0.018596799671649934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,4,128,1,float16,fp8,0,0.018139199912548067
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,4,128,1,fp8,fp8,0,0.016667200624942778
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,8,128,1,float16,float16,0,0.018695999681949616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,8,128,1,float16,fp8,0,0.01658719927072525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,64,8,128,1,fp8,fp8,0,0.01669120043516159
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,64,128,1,float16,float16,0,0.014582400023937226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,64,128,1,float16,fp8,0,0.012545600533485413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,64,128,1,fp8,fp8,0,0.012452799826860428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,1,128,1,float16,float16,0,0.012678399682044983
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,64,64,128,1,float16,fp8,0,0.026979199051856993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,1,128,1,float16,fp8,0,0.012515200674533844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,2,128,1,float16,float16,0,0.012587200105190276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,1,128,1,fp8,fp8,0,0.012644800543785095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,2,128,1,float16,fp8,0,0.01249760016798973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,2,128,1,fp8,fp8,0,0.012601600587368011
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,4,128,1,float16,float16,0,0.012574400007724761
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,4,128,1,float16,fp8,0,0.012488000094890594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,4,128,1,fp8,fp8,0,0.012529599666595458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,8,128,1,float16,float16,0,0.01250240057706833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,8,128,1,float16,fp8,0,0.012582400441169738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,64,8,128,1,fp8,fp8,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,64,128,1,float16,float16,0,0.012377600371837615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,64,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,64,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,1,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,1,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,1,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,2,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,2,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,4,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,4,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,4,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,8,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,8,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,8,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,64,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,64,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,64,128,1,fp8,fp8,0,0.009831999987363815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,1,128,1,float16,float16,0,0.010014399886131287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,1,128,1,float16,fp8,0,0.008427199721336365
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,1,128,1,fp8,fp8,0,0.008382400125265121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,64,8,128,1,float16,float16,0,0.04530239999294281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,2,128,1,float16,float16,0,0.009809599816799163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,2,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,2,128,1,fp8,fp8,0,0.008448000252246856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,4,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,4,128,1,float16,fp8,0,0.008377599716186523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,4,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,8,128,1,float16,float16,0,0.010329599678516387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,8,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,64,8,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,64,1,128,1,float16,float16,0,0.29233760833740235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,64,1,128,1,float16,fp8,0,0.2689824104309082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,64,1,128,1,fp8,fp8,0,0.2685744047164917
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,64,2,128,1,float16,float16,0,0.28909759521484374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,64,2,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,64,2,128,1,float16,fp8,0,0.26867361068725587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,64,2,128,1,fp8,fp8,0,0.2690831899642944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,64,4,128,1,float16,float16,0,0.29113121032714845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,64,4,128,1,float16,fp8,0,0.2685744047164917
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,64,4,128,1,fp8,fp8,0,0.2688704013824463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,64,8,128,1,float16,float16,0,0.2907183885574341
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,64,8,128,1,float16,fp8,0,0.2685647964477539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,64,8,128,1,fp8,fp8,0,0.2694096088409424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,0,0.1517791986465454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,0,0.13953280448913574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,64,128,1,fp8,fp8,0,0.13969600200653076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,1,128,1,float16,float16,0,0.15026079416275023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,1,128,1,float16,fp8,0,0.1394976019859314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,1,128,1,fp8,fp8,0,0.1395840048789978
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,2,128,1,float16,float16,0,0.15004160404205322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,2,128,1,fp8,fp8,0,0.13942400217056275
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,0,0.15007840394973754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,0,0.13950079679489136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,4,128,1,fp8,fp8,0,0.13942559957504272
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,0,0.1394991993904114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,0,0.14996800422668458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,8,128,1,fp8,fp8,0,0.13939520120620727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,0,0.08195520043373108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,0,0.07434239983558655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,64,128,1,fp8,fp8,0,0.0739184021949768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,1,128,1,float16,float16,0,0.07999680042266846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,1,128,1,fp8,fp8,0,0.07389600276947021
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,2,128,1,float16,float16,0,0.0800927996635437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,2,128,1,float16,fp8,0,0.07398880124092103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,2,128,1,fp8,fp8,0,0.07399200201034546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,0,0.0800544023513794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,0,0.07405279874801636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,4,128,1,fp8,fp8,0,0.07395679950714111
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,0,0.08005920052528381
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,0,0.07401279807090759
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,0,0.04529280066490173
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,0,0.041152000427246094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,64,128,1,fp8,fp8,0,0.04123679995536804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,1,128,1,float16,float16,0,0.043196800351142886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,1,128,1,float16,fp8,0,0.04115679860115051
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,1,128,1,fp8,fp8,0,0.04116159975528717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,2,128,1,float16,float16,0,0.043188801407814024
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,2,128,1,float16,fp8,0,0.04116640090942383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,2,128,1,fp8,fp8,0,0.041176000237464906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,0,0.04461759924888611
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,64,2,128,1,float16,fp8,0,0.1395840048789978
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,0,0.04122720062732697
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,4,128,1,fp8,fp8,0,0.04133279919624329
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,0,0.044116801023483275
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,0,0.04124319851398468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,64,8,128,1,fp8,fp8,0,0.04123519957065582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,0,0.027809599041938783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,0,0.0248416006565094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,64,128,1,fp8,fp8,0,0.024758400022983552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,1,128,1,float16,float16,0,0.026785600185394286
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,1,128,1,float16,fp8,0,0.02475679963827133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,1,128,1,fp8,fp8,0,0.024779200553894043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,2,128,1,float16,fp8,0,0.024751999974250795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,1,128,1,float16,fp8,0,0.0742143988609314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,2,128,1,fp8,fp8,0,0.024742400646209715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,0,0.026907199621200563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,0,0.024855999648571013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,4,128,1,fp8,fp8,0,0.0247856006026268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,0,0.02699199914932251
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,0,0.024822400510311128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,64,8,128,1,fp8,fp8,0,0.07416319847106934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,8,128,1,fp8,fp8,0,0.024876800179481507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,0,0.01674239933490753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,64,128,1,fp8,fp8,0,0.01669439971446991
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,1,128,1,float16,float16,0,0.018603199720382692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,1,128,1,float16,fp8,0,0.016756799817085267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,1,128,1,fp8,fp8,0,0.01674239933490753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,2,128,1,float16,float16,0,0.018624000251293182
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,2,128,1,float16,fp8,0,0.016790400445461272
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,2,128,1,fp8,fp8,0,0.01666879951953888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,0,0.018638400733470915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,0,0.016737599670886994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,4,128,1,fp8,fp8,0,0.016707199811935424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,0,0.018512000143527985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,0,0.016707199811935424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,8,128,1,fp8,fp8,0,0.01679359972476959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,64,2,128,1,float16,float16,0,0.02675839960575104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,0,0.014643199741840363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,0,0.012455999851226807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,64,128,1,fp8,fp8,0,0.012577599287033081
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,1,128,1,float16,float16,0,0.012483199685811996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,1,128,1,float16,fp8,0,0.012572799623012543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,1,128,1,fp8,fp8,0,0.012459199875593185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,2,128,1,float16,float16,0,0.012827199697494508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,2,128,1,fp8,fp8,0,0.012604799866676331
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,0,0.012868799269199371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,0,0.012630400061607362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,0,0.01889919936656952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,4,128,1,fp8,fp8,0,0.012547199428081513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,0,0.01252480000257492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,0,0.013676799833774567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,8,128,1,fp8,fp8,0,0.012590399384498597
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,64,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,1,128,1,float16,float16,0,0.01053600013256073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,1,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,1,128,1,fp8,fp8,0,0.010364799946546554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,2,128,1,float16,float16,0,0.010571199655532836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,2,128,1,float16,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,2,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,4,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,8,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,64,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,1,128,1,float16,float16,0,0.010286399722099304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,1,128,1,float16,fp8,0,0.00936639979481697
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,1,128,1,fp8,fp8,0,0.010288000106811523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,2,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,2,128,1,float16,fp8,0,0.010278400033712387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,2,128,1,fp8,fp8,0,0.008462399989366532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,64,2,128,1,float16,fp8,0,0.012542399764060973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,0,0.008455999940633774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,4,128,1,fp8,fp8,0,0.009200000017881394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,0,0.008430399745702744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,64,8,128,1,fp8,fp8,0,0.009470400214195252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,0,0.012563200294971466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,48,1,128,1,float16,fp8,0,11.707138824462891
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,48,1,128,1,fp8,fp8,0,11.725473785400391
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,48,1,128,1,float16,float16,0,15.660963439941407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,48,2,128,1,float16,float16,0,16.285670471191406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,48,2,128,1,float16,fp8,0,11.771371459960937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,48,2,128,1,fp8,fp8,0,11.84022216796875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,48,4,128,1,float16,fp8,0,12.112916564941406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,48,4,128,1,float16,float16,0,16.489717102050783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,48,4,128,1,fp8,fp8,0,11.986382293701173
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,48,8,128,1,float16,float16,0,16.884584045410158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,48,8,128,1,float16,fp8,0,12.085456085205077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,48,8,128,1,fp8,fp8,0,11.758998107910156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,48,128,1,float16,fp8,0,6.228145599365234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,48,128,1,fp8,fp8,0,6.313647842407226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,1,128,1,float16,float16,0,7.93369140625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,1,128,1,float16,fp8,0,5.889503860473633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,1,128,1,fp8,fp8,0,5.954779052734375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,2,128,1,float16,float16,0,8.242617797851562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,2,128,1,float16,fp8,0,5.928327941894532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,2,128,1,fp8,fp8,0,5.972723388671875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,4,128,1,float16,float16,0,8.183001708984374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,4,128,1,float16,fp8,0,5.971728134155273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,4,128,1,fp8,fp8,0,5.983729553222656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,8,128,1,float16,fp8,0,6.044321441650391
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,8,128,1,fp8,fp8,0,6.161155319213867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,8,128,1,float16,float16,0,7.909454345703125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,48,128,1,float16,fp8,0,3.2721248626708985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,48,128,1,fp8,fp8,0,3.3886302947998046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,1,128,1,float16,fp8,0,2.9793136596679686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,1,128,1,float16,float16,0,4.069400024414063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,1,128,1,fp8,fp8,0,2.9817760467529295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,2,128,1,float16,float16,0,4.2597297668457035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,2,128,1,float16,fp8,0,3.0033391952514648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,2,128,1,fp8,fp8,0,2.9705184936523437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,4,128,1,float16,float16,0,3.5997615814208985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,4,128,1,float16,fp8,0,2.983118438720703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,4,128,1,fp8,fp8,0,3.169948768615723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,8,128,1,float16,float16,0,3.9328048706054686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,8,128,1,float16,fp8,0,3.0023216247558593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,8,128,1,fp8,fp8,0,3.0833215713500977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,48,128,1,float16,fp8,0,1.6599327087402345
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,48,128,1,fp8,fp8,0,1.6117567062377929
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,1,128,1,float16,float16,0,1.893436813354492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,1,128,1,float16,fp8,0,1.7292512893676757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,1,128,1,fp8,fp8,0,1.5686623573303222
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,2,128,1,float16,float16,0,1.8848079681396483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,2,128,1,float16,fp8,0,1.829033660888672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,2,128,1,fp8,fp8,0,1.5284144401550293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,4,128,1,float16,float16,0,1.8891904830932618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,4,128,1,float16,fp8,0,1.5499615669250488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,4,128,1,fp8,fp8,0,1.5274880409240723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,8,128,1,float16,float16,0,1.9413791656494142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,8,128,1,float16,fp8,0,1.6583744049072267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,8,128,1,fp8,fp8,0,1.5351280212402343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,48,1,128,1,float16,float16,0,9.318183898925781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,48,1,128,1,float16,fp8,0,6.987866973876953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,48,1,128,1,fp8,fp8,0,7.000745391845703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,48,2,128,1,float16,float16,0,9.565010833740235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,48,2,128,1,float16,fp8,0,6.968705749511718
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,48,2,128,1,fp8,fp8,0,7.008843231201172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,48,4,128,1,float16,float16,0,9.464295959472656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,48,4,128,1,float16,fp8,0,6.98846206665039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,48,4,128,1,fp8,fp8,0,7.005379486083984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,48,8,128,1,float16,float16,0,9.663371276855468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,48,8,128,1,float16,fp8,0,7.127942657470703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,48,48,128,1,float16,float16,0,1.9576927185058595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,48,48,128,1,float16,float16,0,3.987044906616211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,48,48,128,1,float16,float16,0,8.026790618896484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,48,128,1,float16,float16,0,4.743537521362304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,48,8,128,1,fp8,fp8,0,7.071041870117187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,48,128,1,float16,fp8,0,3.9003326416015627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,48,128,1,fp8,fp8,0,3.8031936645507813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,1,128,1,float16,float16,0,4.333721542358399
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,1,128,1,fp8,fp8,0,3.4090560913085937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,1,128,1,float16,fp8,0,3.5046287536621095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,2,128,1,float16,float16,0,4.183414459228516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,2,128,1,float16,fp8,0,3.7796112060546876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,2,128,1,fp8,fp8,0,3.411305618286133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,4,128,1,fp8,fp8,0,3.4243759155273437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,4,128,1,float16,fp8,0,3.520555114746094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,4,128,1,float16,float16,0,4.472585678100586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,8,128,1,float16,float16,0,4.439075088500976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,48,128,1,float16,float16,0,2.5430320739746093
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,8,128,1,float16,fp8,0,3.4278926849365234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,48,8,128,1,fp8,fp8,0,3.429324722290039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,48,128,1,float16,fp8,0,2.072431945800781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,1,128,1,float16,fp8,0,1.760798454284668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,48,128,1,fp8,fp8,0,2.145275115966797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,1,128,1,float16,float16,0,2.0694175720214845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,1,128,1,fp8,fp8,0,1.7371088027954102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,2,128,1,float16,fp8,0,1.74542236328125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,2,128,1,float16,float16,0,2.065999984741211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,2,128,1,fp8,fp8,0,2.0374399185180665
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,4,128,1,float16,fp8,0,1.7385791778564452
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,4,128,1,float16,float16,0,2.1426944732666016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,4,128,1,fp8,fp8,0,1.8205696105957032
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,8,128,1,float16,float16,0,2.1291391372680666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,48,128,1,float16,float16,0,1.1878080368041992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,8,128,1,float16,fp8,0,1.7907648086547852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,48,128,1,float16,fp8,0,1.1305983543395997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,48,8,128,1,fp8,fp8,0,1.9618751525878906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,48,128,1,fp8,fp8,0,1.0155792236328125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,1,128,1,float16,fp8,0,0.9541855812072754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,1,128,1,fp8,fp8,0,0.9297103881835938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,2,128,1,float16,float16,0,1.0707823753356933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,2,128,1,float16,fp8,0,0.9016639709472656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,2,128,1,fp8,fp8,0,0.9007552146911622
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,4,128,1,float16,fp8,0,0.9006640434265136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,4,128,1,float16,float16,0,1.1107775688171386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,4,128,1,fp8,fp8,0,0.9010128021240235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,8,128,1,float16,fp8,0,0.9974016189575196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,8,128,1,float16,float16,0,1.0880175590515138
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,8,128,1,fp8,fp8,0,0.9021120071411133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,48,1,128,1,float16,float16,0,1.1028207778930663
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,48,1,128,1,float16,fp8,0,4.879587173461914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,48,1,128,1,fp8,fp8,0,4.895185470581055
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,48,1,128,1,float16,float16,0,6.430203247070312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,48,2,128,1,float16,float16,0,6.324574279785156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,48,2,128,1,float16,fp8,0,4.851504135131836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,48,2,128,1,fp8,fp8,0,4.9157249450683596
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,48,4,128,1,float16,fp8,0,4.915083312988282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,48,4,128,1,float16,float16,0,6.427671813964844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,48,4,128,1,fp8,fp8,0,4.842601776123047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,48,8,128,1,float16,float16,0,6.594916534423828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,48,8,128,1,float16,fp8,0,4.874203109741211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,48,8,128,1,fp8,fp8,0,4.928796768188477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,48,128,1,float16,float16,0,3.3727855682373047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,48,128,1,float16,fp8,0,2.916761589050293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,48,128,1,fp8,fp8,0,2.73876953125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,1,128,1,float16,fp8,0,2.567452812194824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,1,128,1,float16,float16,0,2.9730735778808595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,1,128,1,fp8,fp8,0,2.4788896560668947
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,2,128,1,float16,float16,0,2.9995296478271483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,2,128,1,float16,fp8,0,2.4534975051879884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,2,128,1,fp8,fp8,0,2.9846752166748045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,4,128,1,float16,fp8,0,2.4545616149902343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,4,128,1,float16,float16,0,3.4069408416748046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,4,128,1,fp8,fp8,0,2.440363121032715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,8,128,1,float16,float16,0,3.0740640640258787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,8,128,1,fp8,fp8,0,2.4376272201538085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,48,8,128,1,float16,fp8,0,2.8478559494018554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,48,128,1,float16,float16,0,1.687723159790039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,48,128,1,float16,fp8,0,1.7264064788818358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,1,128,1,float16,float16,0,1.521713638305664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,1,128,1,float16,fp8,0,1.2950192451477052
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,1,128,1,fp8,fp8,0,1.3568592071533203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,2,128,1,float16,float16,0,1.668961524963379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,2,128,1,float16,fp8,0,1.2494223594665528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,2,128,1,fp8,fp8,0,1.249244785308838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,48,128,1,fp8,fp8,0,1.3688176155090332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,4,128,1,float16,float16,0,1.6334064483642579
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,4,128,1,float16,fp8,0,1.247532844543457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,4,128,1,fp8,fp8,0,1.246451187133789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,8,128,1,float16,float16,0,1.499460792541504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,48,128,1,float16,float16,0,0.8791567802429199
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,8,128,1,float16,fp8,0,1.498367977142334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,48,128,1,float16,fp8,0,0.7144847869873047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,48,128,1,fp8,fp8,0,0.7220511913299561
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,1,128,1,float16,float16,0,0.7905136108398437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,1,128,1,float16,fp8,0,0.6548768043518066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,1,128,1,fp8,fp8,0,0.6983248233795166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,2,128,1,float16,float16,0,0.8180975914001465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,2,128,1,float16,fp8,0,0.653214406967163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,2,128,1,fp8,fp8,0,0.6966400146484375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,4,128,1,float16,float16,0,0.8220159530639648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,4,128,1,float16,fp8,0,0.6533120155334473
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,4,128,1,fp8,fp8,0,0.6693696022033692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,8,128,1,float16,float16,0,0.8342592239379882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,8,128,1,float16,fp8,0,0.6540719985961914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,48,8,128,1,fp8,fp8,0,0.6570879936218261
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,48,8,128,1,fp8,fp8,0,1.2496864318847656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,48,1,128,1,float16,fp8,0,6.40032958984375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,48,1,128,1,fp8,fp8,0,6.39850082397461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,48,1,128,1,float16,float16,0,8.587750244140626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,48,2,128,1,float16,float16,0,8.255806732177735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,48,2,128,1,float16,fp8,0,6.440402984619141
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,48,2,128,1,fp8,fp8,0,6.4238739013671875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,48,4,128,1,float16,float16,0,8.78091049194336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,48,4,128,1,float16,fp8,0,6.463680267333984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,48,4,128,1,fp8,fp8,0,6.444377899169922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,48,8,128,1,float16,float16,0,8.769070434570313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,48,8,128,1,float16,fp8,0,6.656638336181641
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,48,128,1,float16,float16,0,4.635331344604492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,48,8,128,1,fp8,fp8,0,6.437889862060547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,48,128,1,float16,fp8,0,3.6419441223144533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,48,128,1,fp8,fp8,0,3.639072036743164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,1,128,1,float16,fp8,0,3.2629520416259767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,1,128,1,float16,float16,0,4.3092704772949215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,1,128,1,fp8,fp8,0,3.2498992919921874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,2,128,1,float16,fp8,0,3.282939147949219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,2,128,1,float16,float16,0,4.083919906616211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,2,128,1,fp8,fp8,0,3.2857776641845704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,4,128,1,float16,float16,0,4.074246215820312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,4,128,1,float16,fp8,0,3.2384063720703127
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,4,128,1,fp8,fp8,0,3.277742385864258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,8,128,1,float16,float16,0,4.150372695922852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,8,128,1,float16,fp8,0,3.5446128845214844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,48,128,1,float16,float16,0,2.3039936065673827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,48,128,1,float16,fp8,0,2.022700881958008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,48,8,128,1,fp8,fp8,0,3.280678558349609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,48,128,1,fp8,fp8,0,1.8281936645507812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,1,128,1,float16,float16,0,2.0277664184570314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,1,128,1,float16,fp8,0,1.632236862182617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,1,128,1,fp8,fp8,0,1.6806175231933593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,2,128,1,float16,float16,0,1.925916862487793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,2,128,1,fp8,fp8,0,1.628446388244629
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,2,128,1,float16,fp8,0,1.9770816802978515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,4,128,1,float16,float16,0,2.004091262817383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,4,128,1,float16,fp8,0,1.847817611694336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,4,128,1,fp8,fp8,0,1.6303152084350585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,8,128,1,float16,fp8,0,1.7761951446533204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,8,128,1,float16,float16,0,2.0247983932495117
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,48,8,128,1,fp8,fp8,0,1.6632095336914063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,48,128,1,float16,float16,0,1.1539440155029297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,48,128,1,float16,fp8,0,0.9606255531311035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,1,128,1,float16,float16,0,0.983456039428711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,48,128,1,fp8,fp8,0,1.1438336372375488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,1,128,1,float16,fp8,0,0.843388843536377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,1,128,1,fp8,fp8,0,0.8479999542236328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,2,128,1,float16,fp8,0,0.8617216110229492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,2,128,1,float16,float16,0,0.9939056396484375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,2,128,1,fp8,fp8,0,0.8400336265563965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,4,128,1,float16,float16,0,1.0015536308288575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,4,128,1,float16,fp8,0,0.8464544296264649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,4,128,1,fp8,fp8,0,0.9156064033508301
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,8,128,1,float16,float16,0,1.0083583831787108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,48,128,1,float16,float16,0,0.6140463829040528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,8,128,1,fp8,fp8,0,0.8532367706298828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,48,128,1,float16,fp8,0,0.4945968151092529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,48,128,1,fp8,fp8,0,0.5064432144165039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,1,128,1,float16,float16,0,0.5312943935394288
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,1,128,1,fp8,fp8,0,0.4551055908203125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,2,128,1,float16,float16,0,0.5323904037475586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,2,128,1,float16,fp8,0,0.4562992095947266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,2,128,1,fp8,fp8,0,0.46430559158325196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,4,128,1,float16,float16,0,0.5356400012969971
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,4,128,1,float16,fp8,0,0.4473440170288086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,48,8,128,1,float16,fp8,0,0.9275839805603028
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,4,128,1,fp8,fp8,0,0.4494175910949707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,1,128,1,float16,fp8,0,0.4450223922729492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,8,128,1,float16,float16,0,0.5294623851776123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,8,128,1,float16,fp8,0,0.4472959995269775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,48,8,128,1,fp8,fp8,0,0.4464879989624023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,48,1,128,1,float16,fp8,0,3.8515998840332033
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,48,1,128,1,fp8,fp8,0,3.8107040405273436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,48,1,128,1,float16,float16,0,4.727979278564453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,48,2,128,1,float16,float16,0,4.605630493164062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,48,2,128,1,float16,fp8,0,3.871937561035156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,48,2,128,1,fp8,fp8,0,3.8099632263183594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,48,4,128,1,float16,float16,0,4.724017715454101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,48,4,128,1,float16,fp8,0,3.818088150024414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,48,4,128,1,fp8,fp8,0,3.8570911407470705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,48,8,128,1,float16,float16,0,4.710124969482422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,48,8,128,1,float16,fp8,0,3.8941104888916014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,48,128,1,float16,float16,0,2.7823711395263673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,48,128,1,float16,fp8,0,2.360268783569336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,48,128,1,fp8,fp8,0,2.2151071548461916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,48,8,128,1,fp8,fp8,0,3.8257217407226562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,1,128,1,float16,float16,0,2.3925760269165037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,1,128,1,float16,fp8,0,1.929840087890625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,1,128,1,fp8,fp8,0,1.9248096466064453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,2,128,1,float16,float16,0,2.2342927932739256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,2,128,1,fp8,fp8,0,1.9230607986450194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,2,128,1,float16,fp8,0,2.182512092590332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,4,128,1,float16,float16,0,2.3201295852661135
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,4,128,1,float16,fp8,0,2.1706287384033205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,4,128,1,fp8,fp8,0,1.9267744064331054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,8,128,1,float16,float16,0,2.359984016418457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,8,128,1,float16,fp8,0,2.181227111816406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,48,8,128,1,fp8,fp8,0,1.93099365234375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,48,128,1,float16,float16,0,1.381377601623535
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,48,128,1,float16,fp8,0,1.3779744148254394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,48,128,1,fp8,fp8,0,1.1257935523986817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,1,128,1,float16,float16,0,1.1330464363098145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,1,128,1,float16,fp8,0,1.0246479988098145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,1,128,1,fp8,fp8,0,0.9982368469238281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,2,128,1,float16,fp8,0,1.0362239837646485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,2,128,1,fp8,fp8,0,1.0257136344909668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,4,128,1,float16,float16,0,1.1779984474182128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,4,128,1,float16,fp8,0,0.9934767723083496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,4,128,1,fp8,fp8,0,0.986292839050293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,8,128,1,float16,fp8,0,0.9864336013793945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,8,128,1,float16,float16,0,1.212822437286377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,8,128,1,fp8,fp8,0,0.9875167846679688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,48,128,1,float16,float16,0,0.7156256198883056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,48,128,1,float16,fp8,0,0.6335824012756348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,48,2,128,1,float16,float16,0,1.1418560028076172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,1,128,1,float16,fp8,0,0.5516848087310791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,1,128,1,float16,float16,0,0.5892543792724609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,1,128,1,fp8,fp8,0,0.512446403503418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,2,128,1,float16,fp8,0,0.5133135795593262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,2,128,1,float16,float16,0,0.6026688098907471
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,2,128,1,fp8,fp8,0,0.5123824119567871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,4,128,1,float16,fp8,0,0.5166304111480713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,4,128,1,float16,float16,0,0.5966527938842774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,4,128,1,fp8,fp8,0,0.5129119873046875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,8,128,1,float16,fp8,0,0.5189216136932373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,8,128,1,fp8,fp8,0,0.514305591583252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,48,128,1,float16,float16,0,0.3815727949142456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,48,128,1,float16,fp8,0,0.31554241180419923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,48,128,1,fp8,fp8,0,0.3150608062744141
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,48,128,1,fp8,fp8,0,0.5842480182647705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,1,128,1,float16,float16,0,0.3164479970932007
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,1,128,1,float16,fp8,0,0.27570400238037107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,1,128,1,fp8,fp8,0,0.27641279697418214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,2,128,1,float16,float16,0,0.3170880079269409
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,2,128,1,float16,fp8,0,0.2767535924911499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,2,128,1,fp8,fp8,0,0.2766592025756836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,4,128,1,float16,float16,0,0.3198352098464966
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,4,128,1,float16,fp8,0,0.27720160484313966
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,4,128,1,fp8,fp8,0,0.27681760787963866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,8,128,1,float16,float16,0,0.3246432065963745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,8,128,1,fp8,fp8,0,0.2777008056640625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,48,8,128,1,float16,fp8,0,0.2774064064025879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,48,8,128,1,float16,float16,0,0.6237264156341553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,48,1,128,1,float16,fp8,0,3.7541423797607423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,48,1,128,1,fp8,fp8,0,3.75549430847168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,48,1,128,1,float16,float16,0,4.408582305908203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,48,2,128,1,float16,float16,0,4.571620941162109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,48,2,128,1,float16,fp8,0,3.7560848236083983
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,48,2,128,1,fp8,fp8,0,3.7566272735595705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,48,4,128,1,float16,float16,0,4.517454528808594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,48,4,128,1,float16,fp8,0,4.002318572998047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,48,4,128,1,fp8,fp8,0,3.7561439514160155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,48,8,128,1,float16,float16,0,4.653851318359375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,48,8,128,1,float16,fp8,0,3.8397567749023436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,48,8,128,1,fp8,fp8,0,3.780955123901367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,48,128,1,float16,float16,0,2.7617183685302735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,48,128,1,float16,fp8,0,2.400889587402344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,48,128,1,fp8,fp8,0,2.269856071472168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,1,128,1,float16,float16,0,2.1909311294555662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,1,128,1,float16,fp8,0,1.9227359771728516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,1,128,1,fp8,fp8,0,1.8954191207885742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,2,128,1,float16,float16,0,2.143544006347656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,2,128,1,float16,fp8,0,2.0727727890014647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,2,128,1,fp8,fp8,0,1.8938159942626953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,4,128,1,float16,float16,0,2.3175167083740233
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,4,128,1,float16,fp8,0,1.8971311569213867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,4,128,1,fp8,fp8,0,1.8940319061279296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,8,128,1,float16,fp8,0,1.9010175704956054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,8,128,1,float16,float16,0,2.389841651916504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,48,8,128,1,fp8,fp8,0,1.903913688659668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,48,128,1,float16,float16,0,1.4265135765075683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,48,128,1,float16,fp8,0,1.273521614074707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,48,128,1,fp8,fp8,0,1.1522671699523925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,1,128,1,float16,float16,0,1.0822784423828125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,1,128,1,fp8,fp8,0,0.9617584228515625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,2,128,1,float16,fp8,0,0.9769071578979492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,2,128,1,float16,float16,0,1.0724127769470215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,2,128,1,fp8,fp8,0,0.9679264068603516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,4,128,1,float16,float16,0,1.0963664054870605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,4,128,1,fp8,fp8,0,0.963526439666748
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,8,128,1,float16,float16,0,1.1628239631652832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,8,128,1,float16,fp8,0,0.9671024322509766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,1,128,1,float16,fp8,0,1.0058735847473144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,8,128,1,fp8,fp8,0,1.0300576210021972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,48,128,1,float16,float16,0,0.722657585144043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,48,128,1,float16,fp8,0,0.5947472095489502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,48,128,1,fp8,fp8,0,0.601257610321045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,1,128,1,float16,float16,0,0.5629824161529541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,1,128,1,float16,fp8,0,0.4978816032409668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,48,4,128,1,float16,fp8,0,1.0343952178955078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,1,128,1,fp8,fp8,0,0.49692959785461427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,2,128,1,float16,float16,0,0.5642064094543457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,2,128,1,float16,fp8,0,0.4984303951263428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,2,128,1,fp8,fp8,0,0.4961552143096924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,4,128,1,float16,float16,0,0.5618319988250733
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,4,128,1,float16,fp8,0,0.5042304039001465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,4,128,1,fp8,fp8,0,0.4989583969116211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,8,128,1,float16,float16,0,0.5732240200042724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,8,128,1,fp8,fp8,0,0.49794559478759765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,48,128,1,float16,float16,0,0.3806976079940796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,48,128,1,float16,fp8,0,0.3135632038116455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,48,128,1,fp8,fp8,0,0.31321120262145996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,1,128,1,float16,float16,0,0.2988800048828125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,1,128,1,float16,fp8,0,0.2643392086029053
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,1,128,1,fp8,fp8,0,0.26515679359436034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,2,128,1,float16,float16,0,0.29950881004333496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,2,128,1,float16,fp8,0,0.2642240047454834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,4,128,1,float16,float16,0,0.302564811706543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,4,128,1,float16,fp8,0,0.2645008087158203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,4,128,1,fp8,fp8,0,0.26592800617218015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,8,128,1,float16,float16,0,0.30204000473022463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,8,128,1,float16,fp8,0,0.26618878841400145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,8,128,1,fp8,fp8,0,0.26467199325561525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,48,128,1,float16,float16,0,0.20890240669250487
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,48,128,1,float16,fp8,0,0.17400480508804322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,48,128,1,fp8,fp8,0,0.1743839979171753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,1,128,1,float16,float16,0,0.16041120290756225
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,1,128,1,float16,fp8,0,0.14756959676742554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,1,128,1,fp8,fp8,0,0.14650559425354004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,2,128,1,float16,float16,0,0.16469119787216185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,2,128,1,float16,fp8,0,0.14669439792633057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,2,128,1,fp8,fp8,0,0.14746559858322145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,48,8,128,1,float16,fp8,0,0.49935359954833985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,4,128,1,float16,float16,0,0.1623568058013916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,4,128,1,fp8,fp8,0,0.14731040000915527
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,8,128,1,float16,float16,0,0.16656160354614258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,8,128,1,float16,fp8,0,0.14721280336380005
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,8,128,1,fp8,fp8,0,0.1467695951461792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,48,2,128,1,fp8,fp8,0,0.2645711898803711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,48,1,128,1,float16,fp8,0,2.3223503112792967
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,48,1,128,1,float16,float16,0,2.5756736755371095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,48,1,128,1,fp8,fp8,0,2.3246816635131835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,48,4,128,1,float16,fp8,0,0.14739999771118165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,48,2,128,1,float16,fp8,0,2.435553550720215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,48,2,128,1,fp8,fp8,0,2.331563186645508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,48,4,128,1,float16,float16,0,2.7130064010620116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,48,4,128,1,float16,fp8,0,2.325484848022461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,48,4,128,1,fp8,fp8,0,2.3258256912231445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,48,2,128,1,float16,float16,0,2.6045343399047853
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,48,8,128,1,float16,fp8,0,2.3353519439697266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,48,8,128,1,fp8,fp8,0,2.333448028564453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,48,128,1,float16,float16,0,1.7211183547973632
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,48,128,1,float16,fp8,0,1.4536831855773926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,48,128,1,fp8,fp8,0,1.4538656234741212
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,1,128,1,float16,float16,0,1.2843759536743165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,48,8,128,1,float16,float16,0,2.7772943496704103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,1,128,1,float16,fp8,0,1.1743503570556642
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,1,128,1,fp8,fp8,0,1.1754912376403808
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,2,128,1,float16,float16,0,1.3099632263183594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,2,128,1,float16,fp8,0,1.177337646484375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,2,128,1,fp8,fp8,0,1.175051212310791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,4,128,1,float16,float16,0,1.31626558303833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,4,128,1,float16,fp8,0,1.211070442199707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,4,128,1,fp8,fp8,0,1.1800239562988282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,8,128,1,float16,float16,0,1.3549087524414063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,8,128,1,float16,fp8,0,1.1863887786865235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,48,8,128,1,fp8,fp8,0,1.183448028564453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,48,128,1,float16,fp8,0,0.7425439834594727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,1,128,1,float16,float16,0,0.6527200222015381
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,1,128,1,float16,fp8,0,0.6012144088745117
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,1,128,1,fp8,fp8,0,0.6054975986480713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,2,128,1,float16,float16,0,0.6547904014587402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,2,128,1,float16,fp8,0,0.6065968036651611
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,48,128,1,float16,float16,0,0.8913151741027832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,48,128,1,fp8,fp8,0,0.7415023803710937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,4,128,1,float16,float16,0,0.6807968139648437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,4,128,1,float16,fp8,0,0.6016272068023681
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,4,128,1,fp8,fp8,0,0.6025263786315918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,8,128,1,float16,fp8,0,0.6050127983093262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,8,128,1,float16,float16,0,0.6904384136199951
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,48,128,1,float16,float16,0,0.45630559921264646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,8,128,1,fp8,fp8,0,0.6049856185913086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,48,128,1,float16,fp8,0,0.38536159992218016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,48,128,1,fp8,fp8,0,0.3869920015335083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,48,2,128,1,fp8,fp8,0,0.6015952110290528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,1,128,1,float16,float16,0,0.3432784080505371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,1,128,1,float16,fp8,0,0.3142704010009766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,1,128,1,fp8,fp8,0,0.313369607925415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,2,128,1,float16,float16,0,0.34429919719696045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,2,128,1,float16,fp8,0,0.31500959396362305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,2,128,1,fp8,fp8,0,0.31372320652008057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,4,128,1,float16,float16,0,0.34637439250946045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,4,128,1,float16,fp8,0,0.3145279884338379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,4,128,1,fp8,fp8,0,0.3152944087982178
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,8,128,1,float16,float16,0,0.3568639993667603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,8,128,1,float16,fp8,0,0.31573760509490967
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,48,8,128,1,fp8,fp8,0,0.315011191368103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,48,128,1,float16,fp8,0,0.207094407081604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,48,128,1,fp8,fp8,0,0.2071295976638794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,1,128,1,float16,float16,0,0.18552000522613527
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,1,128,1,float16,fp8,0,0.1689743995666504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,1,128,1,fp8,fp8,0,0.1698848009109497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,2,128,1,float16,float16,0,0.18594880104064943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,2,128,1,float16,fp8,0,0.1693295955657959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,2,128,1,fp8,fp8,0,0.16992160081863403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,4,128,1,float16,float16,0,0.18886239528656007
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,4,128,1,float16,fp8,0,0.1702064037322998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,4,128,1,fp8,fp8,0,0.17022240161895752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,8,128,1,float16,float16,0,0.19285759925842286
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,48,128,1,float16,float16,0,0.13533120155334472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,48,128,1,float16,fp8,0,0.11669280529022216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,48,128,1,fp8,fp8,0,0.1168511986732483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,1,128,1,float16,float16,0,0.10409599542617798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,1,128,1,float16,fp8,0,0.09655200242996216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,1,128,1,fp8,fp8,0,0.09651839733123779
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,2,128,1,float16,float16,0,0.10474560260772706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,2,128,1,float16,fp8,0,0.0967199981212616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,2,128,1,fp8,fp8,0,0.09644799828529357
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,4,128,1,float16,float16,0,0.10480320453643799
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,4,128,1,float16,fp8,0,0.09660159945487976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,48,128,1,float16,float16,0,0.24529919624328614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,4,128,1,fp8,fp8,0,0.09655839800834656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,8,128,1,float16,float16,0,0.10878880023956299
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,8,128,1,float16,fp8,0,0.09648799896240234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,48,8,128,1,fp8,fp8,0,0.09722399711608887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,8,128,1,float16,fp8,0,0.1704576015472412
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,48,8,128,1,fp8,fp8,0,0.17055039405822753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,48,1,128,1,float16,fp8,0,2.4247919082641602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,48,1,128,1,float16,float16,0,2.6079904556274416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,48,1,128,1,fp8,fp8,0,2.418671989440918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,48,2,128,1,float16,float16,0,2.5612207412719727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,48,2,128,1,float16,fp8,0,2.425609588623047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,48,2,128,1,fp8,fp8,0,2.5604095458984375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,48,4,128,1,float16,float16,0,2.6542863845825195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,48,4,128,1,float16,fp8,0,2.4936256408691406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,48,4,128,1,fp8,fp8,0,2.4282976150512696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,48,8,128,1,float16,float16,0,2.7793039321899413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,48,8,128,1,float16,fp8,0,2.512966346740723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,48,8,128,1,fp8,fp8,0,2.430006408691406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,48,128,1,float16,float16,0,1.873094367980957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,48,128,1,float16,fp8,0,1.6163711547851562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,1,128,1,float16,float16,0,1.2852767944335937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,48,128,1,fp8,fp8,0,1.5879584312438966
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,1,128,1,float16,fp8,0,1.2542384147644043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,1,128,1,fp8,fp8,0,1.2218463897705079
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,2,128,1,float16,float16,0,1.3168656349182128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,2,128,1,float16,fp8,0,1.2204912185668946
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,2,128,1,fp8,fp8,0,1.2243151664733887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,4,128,1,float16,float16,0,1.330065631866455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,4,128,1,float16,fp8,0,1.228987216949463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,4,128,1,fp8,fp8,0,1.2226016044616699
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,8,128,1,float16,float16,0,1.3984720230102539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,8,128,1,float16,fp8,0,1.2597663879394532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,48,128,1,float16,float16,0,0.934928035736084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,48,8,128,1,fp8,fp8,0,1.225926399230957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,48,128,1,float16,fp8,0,0.8473263740539551
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,48,128,1,fp8,fp8,0,0.8064640045166016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,1,128,1,float16,float16,0,0.6538911819458008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,1,128,1,float16,fp8,0,0.6210447788238526
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,1,128,1,fp8,fp8,0,0.622273588180542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,2,128,1,float16,float16,0,0.6625103950500488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,2,128,1,fp8,fp8,0,0.6210959911346435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,4,128,1,float16,fp8,0,0.6219535827636719
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,4,128,1,fp8,fp8,0,0.6216432094573975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,8,128,1,float16,float16,0,0.6994624137878418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,8,128,1,float16,fp8,0,0.6241104125976562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,8,128,1,fp8,fp8,0,0.6249887943267822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,48,128,1,float16,float16,0,0.47974557876586915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,48,128,1,float16,fp8,0,0.4146399974822998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,48,128,1,fp8,fp8,0,0.4141263961791992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,1,128,1,float16,float16,0,0.3407664060592651
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,2,128,1,float16,fp8,0,0.6216639995574951
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,1,128,1,fp8,fp8,0,0.32062718868255613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,48,4,128,1,float16,float16,0,0.6851727962493896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,2,128,1,float16,float16,0,0.33872160911560056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,2,128,1,float16,fp8,0,0.3220927953720093
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,2,128,1,fp8,fp8,0,0.3216207981109619
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,4,128,1,float16,float16,0,0.3519455909729004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,4,128,1,float16,fp8,0,0.3216016054153442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,4,128,1,fp8,fp8,0,0.32232160568237306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,8,128,1,float16,float16,0,0.3582047939300537
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,8,128,1,float16,fp8,0,0.32383999824523924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,8,128,1,fp8,fp8,0,0.32294559478759766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,48,128,1,float16,float16,0,0.2547584056854248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,48,128,1,float16,fp8,0,0.21898560523986815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,48,128,1,fp8,fp8,0,0.21943039894104005
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,1,128,1,float16,float16,0,0.18025280237197877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,1,128,1,float16,fp8,0,0.1720639944076538
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,1,128,1,fp8,fp8,0,0.17063039541244507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,48,1,128,1,float16,fp8,0,0.3209343910217285
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,2,128,1,float16,float16,0,0.1848863959312439
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,2,128,1,float16,fp8,0,0.17060960531234742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,2,128,1,fp8,fp8,0,0.17221280336380004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,4,128,1,float16,float16,0,0.18758080005645753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,4,128,1,fp8,fp8,0,0.1721791982650757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,8,128,1,float16,float16,0,0.19406880140304567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,8,128,1,float16,fp8,0,0.17217119932174682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,8,128,1,fp8,fp8,0,0.1725167989730835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,48,128,1,float16,float16,0,0.1393488049507141
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,48,128,1,float16,fp8,0,0.12106560468673706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,48,128,1,fp8,fp8,0,0.1208191990852356
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,1,128,1,float16,float16,0,0.10210720300674439
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,1,128,1,float16,fp8,0,0.09446560144424439
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,2,128,1,float16,float16,0,0.10239360332489014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,1,128,1,fp8,fp8,0,0.09455839991569519
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,2,128,1,fp8,fp8,0,0.0951903998851776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,4,128,1,float16,float16,0,0.10283839702606201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,4,128,1,float16,fp8,0,0.09455519914627075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,4,128,1,fp8,fp8,0,0.09496480226516724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,8,128,1,float16,float16,0,0.10737119913101197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,8,128,1,float16,fp8,0,0.09458720088005065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,8,128,1,fp8,fp8,0,0.09513440132141113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,48,128,1,float16,float16,0,0.08200160264968873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,48,128,1,float16,fp8,0,0.06985759735107422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,48,128,1,fp8,fp8,0,0.06984159946441651
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,1,128,1,float16,fp8,0,0.05772799849510193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,1,128,1,fp8,fp8,0,0.05783039927482605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,2,128,1,float16,float16,0,0.06167839765548706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,2,128,1,float16,fp8,0,0.05760319828987122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,2,128,1,fp8,fp8,0,0.05767199993133545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,4,128,1,float16,float16,0,0.0617904007434845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,4,128,1,float16,fp8,0,0.057601600885391235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,4,128,1,fp8,fp8,0,0.05758879780769348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,8,128,1,float16,float16,0,0.0636672019958496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,8,128,1,float16,fp8,0,0.05758559703826904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,8,128,1,fp8,fp8,0,0.057657599449157715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,48,2,128,1,float16,fp8,0,0.09470720291137695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,48,1,128,1,float16,float16,0,0.062174397706985476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,48,1,128,1,float16,float16,0,1.609649658203125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,48,1,128,1,float16,fp8,0,1.5673824310302735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,48,4,128,1,float16,fp8,0,0.17109919786453248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,48,1,128,1,fp8,fp8,0,1.5688735961914062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,48,2,128,1,float16,float16,0,1.6453184127807616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,48,2,128,1,float16,fp8,0,1.5679455757141114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,48,2,128,1,fp8,fp8,0,1.5663599967956543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,48,4,128,1,float16,float16,0,1.685513687133789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,48,4,128,1,float16,fp8,0,1.6648784637451173
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,48,4,128,1,fp8,fp8,0,1.5697983741760253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,48,8,128,1,float16,float16,0,1.7321231842041016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,48,8,128,1,float16,fp8,0,1.5800288200378418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,48,128,1,float16,float16,0,1.2245519638061524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,48,8,128,1,fp8,fp8,0,1.5733136177062987
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,48,128,1,float16,fp8,0,1.0895824432373047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,1,128,1,float16,float16,0,0.8205216407775879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,48,128,1,fp8,fp8,0,1.069324779510498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,1,128,1,fp8,fp8,0,0.8091440200805664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,2,128,1,float16,float16,0,0.8261199951171875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,2,128,1,float16,fp8,0,0.7947775840759277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,2,128,1,fp8,fp8,0,0.7929999828338623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,4,128,1,float16,float16,0,0.8544528007507324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,4,128,1,float16,fp8,0,0.7955984115600586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,4,128,1,fp8,fp8,0,0.7940256118774414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,8,128,1,float16,float16,0,0.909449577331543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,8,128,1,float16,fp8,0,0.7980256080627441
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,8,128,1,fp8,fp8,0,0.7959263801574707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,48,128,1,float16,float16,0,0.6330895900726319
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,48,128,1,float16,fp8,0,0.5445343971252441
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,48,1,128,1,float16,fp8,0,0.7923808097839355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,48,128,1,fp8,fp8,0,0.5434000015258789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,1,128,1,float16,float16,0,0.43069281578063967
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,1,128,1,fp8,fp8,0,0.40578560829162597
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,2,128,1,float16,float16,0,0.42107357978820803
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,2,128,1,float16,fp8,0,0.41599202156066895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,2,128,1,fp8,fp8,0,0.4058207988739014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,4,128,1,float16,float16,0,0.4303904056549072
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,4,128,1,float16,fp8,0,0.4065104007720947
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,4,128,1,fp8,fp8,0,0.4060832023620605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,8,128,1,float16,float16,0,0.44923038482666017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,8,128,1,float16,fp8,0,0.4077280044555664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,8,128,1,fp8,fp8,0,0.4086559772491455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,48,128,1,float16,float16,0,0.3215728044509888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,48,128,1,float16,fp8,0,0.28171041011810305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,48,128,1,fp8,fp8,0,0.28193600177764894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,1,128,1,float16,float16,0,0.22144320011138915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,1,128,1,float16,fp8,0,0.21195359230041505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,1,128,1,fp8,fp8,0,0.21134560108184813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,2,128,1,float16,float16,0,0.2216399908065796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,2,128,1,float16,fp8,0,0.21230080127716064
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,2,128,1,fp8,fp8,0,0.21296160221099852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,4,128,1,float16,float16,0,0.22639839649200438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,4,128,1,float16,fp8,0,0.21253759860992433
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,4,128,1,fp8,fp8,0,0.21328160762786866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,48,1,128,1,float16,fp8,0,0.40677919387817385
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,8,128,1,float16,fp8,0,0.21352000236511232
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,8,128,1,fp8,fp8,0,0.21367359161376953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,48,128,1,float16,float16,0,0.17219040393829346
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,48,128,1,float16,fp8,0,0.15181599855422973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,48,128,1,fp8,fp8,0,0.15141600370407104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,1,128,1,float16,float16,0,0.12222559452056884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,1,128,1,float16,fp8,0,0.11502560377120971
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,1,128,1,fp8,fp8,0,0.11510560512542725
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,2,128,1,float16,float16,0,0.12125279903411865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,2,128,1,float16,fp8,0,0.11581759452819824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,2,128,1,fp8,fp8,0,0.11494879722595215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,4,128,1,float16,float16,0,0.12530879974365233
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,4,128,1,float16,fp8,0,0.1148527979850769
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,4,128,1,fp8,fp8,0,0.11603679656982421
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,8,128,1,float16,float16,0,0.12720160484313964
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,8,128,1,float16,fp8,0,0.11622079610824584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,48,8,128,1,fp8,fp8,0,0.11559040546417236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,48,128,1,float16,float16,0,0.0962224006652832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,48,128,1,float16,fp8,0,0.08459519743919372
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,48,128,1,fp8,fp8,0,0.08444160223007202
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,1,128,1,float16,float16,0,0.06896479725837708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,1,128,1,float16,fp8,0,0.06580479741096497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,1,128,1,fp8,fp8,0,0.06573759913444518
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,2,128,1,float16,float16,0,0.0700432002544403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,2,128,1,float16,fp8,0,0.06589440107345582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,2,128,1,fp8,fp8,0,0.06629279851913453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,4,128,1,float16,float16,0,0.06980479955673217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,4,128,1,float16,fp8,0,0.06640959978103637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,8,128,1,float16,float16,0,0.07360479831695557
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,8,128,1,float16,fp8,0,0.0660431981086731
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,8,128,1,fp8,fp8,0,0.0657920002937317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,48,8,128,1,float16,float16,0,0.2358720064163208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,48,128,1,float16,fp8,0,0.049379199743270874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,48,128,1,float16,float16,0,0.0553551971912384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,48,128,1,fp8,fp8,0,0.04950880110263824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,1,128,1,float16,float16,0,0.04326080083847046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,1,128,1,float16,fp8,0,0.041228801012039185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,1,128,1,fp8,fp8,0,0.04117920100688934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,2,128,1,float16,fp8,0,0.0414000004529953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,2,128,1,fp8,fp8,0,0.04129279851913452
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,4,128,1,float16,float16,0,0.045259198546409606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,4,128,1,float16,fp8,0,0.041201600432395936
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,4,128,1,fp8,fp8,0,0.04134880006313324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,8,128,1,float16,float16,0,0.04532159864902496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,8,128,1,fp8,fp8,0,0.04124000072479248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,48,4,128,1,fp8,fp8,0,0.06580479741096497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,48,1,128,1,float16,float16,0,1.755499267578125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,2,128,1,float16,float16,0,0.04335519969463349
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,48,8,128,1,float16,fp8,0,0.04150879979133606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,48,1,128,1,float16,fp8,0,1.7447359085083007
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,48,1,128,1,fp8,fp8,0,1.7812015533447265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,48,2,128,1,float16,float16,0,1.7647743225097656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,48,2,128,1,float16,fp8,0,1.7464559555053711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,48,2,128,1,fp8,fp8,0,1.7447872161865234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,48,4,128,1,float16,float16,0,1.814031982421875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,48,4,128,1,float16,fp8,0,1.746571159362793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,48,4,128,1,fp8,fp8,0,1.7486656188964844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,48,8,128,1,float16,float16,0,1.9230623245239258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,48,8,128,1,float16,fp8,0,1.7608303070068358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,48,8,128,1,fp8,fp8,0,1.750071907043457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,48,128,1,float16,float16,0,1.404524803161621
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,48,128,1,float16,fp8,0,1.2414336204528809
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,48,128,1,fp8,fp8,0,1.234990406036377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,1,128,1,float16,float16,0,0.887822437286377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,1,128,1,float16,fp8,0,0.8816944122314453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,1,128,1,fp8,fp8,0,0.8805312156677246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,2,128,1,float16,float16,0,0.8926560401916503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,2,128,1,float16,fp8,0,0.8799391746520996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,2,128,1,fp8,fp8,0,0.8823871612548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,4,128,1,float16,float16,0,0.918939208984375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,4,128,1,float16,fp8,0,0.8807663917541504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,4,128,1,fp8,fp8,0,0.880577564239502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,8,128,1,float16,float16,0,0.9719743728637695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,8,128,1,float16,fp8,0,0.8840959548950196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,48,8,128,1,fp8,fp8,0,0.8837696075439453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,48,128,1,float16,float16,0,0.7083775997161865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,48,128,1,float16,fp8,0,0.6290847778320312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,1,128,1,float16,float16,0,0.45346717834472655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,48,128,1,fp8,fp8,0,0.6270832061767578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,1,128,1,fp8,fp8,0,0.4492527961730957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,2,128,1,float16,float16,0,0.4560863971710205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,2,128,1,float16,fp8,0,0.44785919189453127
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,2,128,1,fp8,fp8,0,0.4487023830413818
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,4,128,1,float16,float16,0,0.4677840232849121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,4,128,1,float16,fp8,0,0.4490816116333008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,4,128,1,fp8,fp8,0,0.4498000144958496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,8,128,1,float16,float16,0,0.49289278984069823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,8,128,1,float16,fp8,0,0.4506336212158203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,8,128,1,fp8,fp8,0,0.45107197761535645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,48,128,1,float16,float16,0,0.3639071941375732
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,48,128,1,float16,fp8,0,0.32291040420532224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,48,128,1,fp8,fp8,0,0.3221776008605957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,1,128,1,float16,float16,0,0.2380511999130249
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,1,128,1,float16,fp8,0,0.23161919116973878
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,1,128,1,fp8,fp8,0,0.23251039981842042
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,2,128,1,float16,float16,0,0.23735361099243163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,2,128,1,float16,fp8,0,0.23267040252685547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,2,128,1,fp8,fp8,0,0.23190879821777344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,4,128,1,float16,float16,0,0.24446239471435546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,4,128,1,float16,fp8,0,0.2317471981048584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,4,128,1,fp8,fp8,0,0.23337440490722655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,8,128,1,float16,float16,0,0.2549823999404907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,8,128,1,float16,fp8,0,0.2338495969772339
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,48,8,128,1,fp8,fp8,0,0.23268480300903321
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,48,128,1,float16,float16,0,0.1924415946006775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,48,128,1,float16,fp8,0,0.17020959854125978
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,1,128,1,float16,float16,0,0.12744319438934326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,48,128,1,fp8,fp8,0,0.17067519426345826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,1,128,1,float16,fp8,0,0.12371519804000855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,1,128,1,fp8,fp8,0,0.1240447998046875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,2,128,1,float16,float16,0,0.12780319452285765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,2,128,1,float16,fp8,0,0.12382080554962158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,2,128,1,fp8,fp8,0,0.12419519424438477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,4,128,1,float16,fp8,0,0.12487839460372925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,4,128,1,fp8,fp8,0,0.12422399520874024
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,8,128,1,float16,float16,0,0.13754080533981322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,8,128,1,float16,fp8,0,0.1250640034675598
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,8,128,1,fp8,fp8,0,0.12511999607086183
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,48,1,128,1,float16,fp8,0,0.4474991798400879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,48,128,1,float16,float16,0,0.10558719635009765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,48,128,1,float16,fp8,0,0.09267839789390564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,48,128,1,fp8,fp8,0,0.09262719750404358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,1,128,1,float16,float16,0,0.0712656021118164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,1,128,1,float16,fp8,0,0.0680624008178711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,1,128,1,fp8,fp8,0,0.06794720292091369
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,2,128,1,float16,fp8,0,0.06808480024337768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,2,128,1,fp8,fp8,0,0.06808000206947326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,4,128,1,float16,float16,0,0.07348319888114929
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,4,128,1,float16,fp8,0,0.06837599873542785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,4,128,1,fp8,fp8,0,0.06795520186424256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,8,128,1,float16,float16,0,0.07693759799003601
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,8,128,1,float16,fp8,0,0.06805599927902221
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,8,128,1,fp8,fp8,0,0.06831520199775695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,48,128,1,float16,fp8,0,0.05347520112991333
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,48,128,1,fp8,fp8,0,0.05257279872894287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,1,128,1,float16,float16,0,0.0434255987405777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,1,128,1,float16,fp8,0,0.04113759994506836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,48,4,128,1,float16,float16,0,0.13124799728393555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,2,128,1,float16,float16,0,0.04451520144939423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,2,128,1,float16,fp8,0,0.04191359877586365
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,2,128,1,fp8,fp8,0,0.0425024002790451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,4,128,1,float16,float16,0,0.04529759883880615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,4,128,1,float16,fp8,0,0.042008000612258914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,4,128,1,fp8,fp8,0,0.04256959855556488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,8,128,1,float16,float16,0,0.04540959894657135
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,8,128,1,float16,fp8,0,0.041912001371383664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,8,128,1,fp8,fp8,0,0.04228160083293915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,48,128,1,float16,float16,0,0.035036799311637876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,48,128,1,float16,fp8,0,0.03511680066585541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,48,2,128,1,float16,float16,0,0.07173759937286377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,48,128,1,fp8,fp8,0,0.03502239882946014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,1,128,1,float16,float16,0,0.03102880120277405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,1,128,1,float16,fp8,0,0.028966400027275085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,1,128,1,fp8,fp8,0,0.028988799452781676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,2,128,1,float16,float16,0,0.031143999099731444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,2,128,1,float16,fp8,0,0.02919679880142212
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,2,128,1,fp8,fp8,0,0.029734399914741517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,4,128,1,float16,float16,0,0.03118079900741577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,4,128,1,float16,fp8,0,0.029091200232505797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,4,128,1,fp8,fp8,0,0.028923198580741882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,8,128,1,float16,float16,0,0.03296479880809784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,48,128,1,float16,float16,0,0.06247519850730896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,8,128,1,fp8,fp8,0,0.03087199926376343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,48,1,128,1,fp8,fp8,0,0.04139519929885864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,48,1,128,1,float16,float16,0,1.3785408020019532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,48,1,128,1,float16,fp8,0,1.3984848022460938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,48,1,128,1,fp8,fp8,0,1.3961935997009278
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,48,2,128,1,float16,float16,0,1.3851823806762695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,48,2,128,1,float16,fp8,0,1.3964799880981444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,48,8,128,1,float16,fp8,0,0.028972798585891725
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,48,2,128,1,fp8,fp8,0,1.3940544128417969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,48,4,128,1,float16,float16,0,1.4254256248474122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,48,4,128,1,float16,fp8,0,1.3973440170288085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,48,4,128,1,fp8,fp8,0,1.3950063705444335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,48,8,128,1,float16,float16,0,1.505568027496338
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,48,8,128,1,float16,fp8,0,1.3985535621643066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,48,8,128,1,fp8,fp8,0,1.3995183944702148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,48,128,1,float16,float16,0,1.1699952125549316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,48,128,1,float16,fp8,0,1.0456656455993651
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,1,128,1,float16,float16,0,0.6993135929107666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,1,128,1,float16,fp8,0,0.704750394821167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,48,128,1,fp8,fp8,0,1.047214412689209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,1,128,1,fp8,fp8,0,0.7050928115844727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,2,128,1,float16,float16,0,0.7016751766204834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,2,128,1,float16,fp8,0,0.7042895793914795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,2,128,1,fp8,fp8,0,0.704963207244873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,4,128,1,float16,float16,0,0.7192959785461426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,4,128,1,float16,fp8,0,0.7065567970275879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,4,128,1,fp8,fp8,0,0.7046559810638428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,8,128,1,float16,float16,0,0.7618544101715088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,8,128,1,float16,fp8,0,0.7070752143859863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,48,8,128,1,fp8,fp8,0,0.7059631824493409
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,48,128,1,float16,float16,0,0.5944064140319825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,48,128,1,float16,fp8,0,0.5322112083435059
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,48,128,1,fp8,fp8,0,0.5306655883789062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,1,128,1,float16,fp8,0,0.3596911907196045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,1,128,1,fp8,fp8,0,0.3593760013580322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,2,128,1,float16,float16,0,0.35869441032409666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,2,128,1,float16,fp8,0,0.3587183952331543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,2,128,1,fp8,fp8,0,0.3598736047744751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,4,128,1,float16,float16,0,0.36705119609832765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,4,128,1,float16,fp8,0,0.3604383945465088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,4,128,1,fp8,fp8,0,0.35945279598236085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,8,128,1,float16,float16,0,0.38898720741271975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,8,128,1,float16,fp8,0,0.3599440097808838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,48,128,1,float16,float16,0,0.30450239181518557
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,8,128,1,fp8,fp8,0,0.3609776020050049
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,48,128,1,float16,fp8,0,0.27279200553894045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,48,128,1,fp8,fp8,0,0.27359681129455565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,48,1,128,1,float16,float16,0,0.3568399906158447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,1,128,1,float16,float16,0,0.18653440475463867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,1,128,1,float16,fp8,0,0.18673919439315795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,2,128,1,float16,float16,0,0.18667999505996705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,2,128,1,float16,fp8,0,0.18669120073318482
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,4,128,1,float16,float16,0,0.191867196559906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,4,128,1,float16,fp8,0,0.18657439947128296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,2,128,1,fp8,fp8,0,0.18677760362625123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,4,128,1,fp8,fp8,0,0.18653279542922974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,8,128,1,float16,float16,0,0.20188639163970948
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,8,128,1,float16,fp8,0,0.1867151975631714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,8,128,1,fp8,fp8,0,0.18700480461120605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,48,128,1,float16,float16,0,0.16059520244598388
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,48,128,1,fp8,fp8,0,0.1439247965812683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,1,128,1,float16,float16,0,0.10071680545806885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,1,128,1,float16,fp8,0,0.10040479898452759
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,1,128,1,fp8,fp8,0,0.10029120445251465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,2,128,1,float16,float16,0,0.10075199604034424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,2,128,1,float16,fp8,0,0.10070240497589111
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,2,128,1,fp8,fp8,0,0.10024960041046142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,4,128,1,float16,float16,0,0.10447039604187011
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,4,128,1,float16,fp8,0,0.10003039836883545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,4,128,1,fp8,fp8,0,0.10049279928207397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,8,128,1,float16,float16,0,0.10857599973678589
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,8,128,1,float16,fp8,0,0.1005903959274292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,8,128,1,fp8,fp8,0,0.1005295991897583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,48,128,1,float16,float16,0,0.08823999762535095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,48,128,1,float16,fp8,0,0.07824000120162963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,48,128,1,fp8,fp8,0,0.07954720258712769
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,1,128,1,float16,float16,0,0.05565440058708191
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,1,128,1,float16,fp8,0,0.05552319884300232
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,1,128,1,fp8,fp8,0,0.05542399883270264
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,2,128,1,float16,float16,0,0.05750399827957153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,2,128,1,float16,fp8,0,0.05550240278244019
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,2,128,1,fp8,fp8,0,0.055579197406768796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,4,128,1,float16,float16,0,0.05799840092658996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,4,128,1,float16,fp8,0,0.055550402402877806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,4,128,1,fp8,fp8,0,0.055478399991989134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,8,128,1,float16,float16,0,0.06170079708099365
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,48,1,128,1,fp8,fp8,0,0.18656320571899415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,8,128,1,float16,fp8,0,0.055508798360824584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,48,8,128,1,fp8,fp8,0,0.05551519989967346
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,48,128,1,float16,float16,0,0.05135040283203125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,48,128,1,fp8,fp8,0,0.04530400037765503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,1,128,1,float16,float16,0,0.035113599896430966
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,1,128,1,float16,fp8,0,0.03390240073204041
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,1,128,1,fp8,fp8,0,0.03394719958305359
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,2,128,1,float16,float16,0,0.035158398747444156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,2,128,1,float16,fp8,0,0.034411200881004335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,2,128,1,fp8,fp8,0,0.03404319882392883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,4,128,1,float16,float16,0,0.03562400043010712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,4,128,1,float16,fp8,0,0.03499679863452911
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,4,128,1,fp8,fp8,0,0.034625598788261415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,8,128,1,float16,float16,0,0.037176001071929934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,48,48,128,1,float16,fp8,0,0.14372479915618896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,8,128,1,float16,fp8,0,0.03466239869594574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,8,128,1,fp8,fp8,0,0.03495840132236481
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,48,128,1,float16,float16,0,0.02876800000667572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,48,128,1,float16,fp8,0,0.028918400406837463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,48,128,1,fp8,fp8,0,0.028972798585891725
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,1,128,1,float16,float16,0,0.02481919974088669
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,1,128,1,float16,fp8,0,0.024422399699687958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,1,128,1,fp8,fp8,0,0.02466239929199219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,2,128,1,float16,float16,0,0.02494720071554184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,2,128,1,float16,fp8,0,0.02465119957923889
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,2,128,1,fp8,fp8,0,0.02468159943819046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,4,128,1,float16,float16,0,0.02481919974088669
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,4,128,1,float16,fp8,0,0.02462079972028732
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,4,128,1,fp8,fp8,0,0.024659200012683867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,8,128,1,float16,float16,0,0.026736000180244447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,8,128,1,fp8,fp8,0,0.02468640059232712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,48,128,1,float16,float16,0,0.01873279958963394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,48,128,1,float16,fp8,0,0.01957920044660568
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,48,128,1,fp8,fp8,0,0.018779200315475465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,1,128,1,float16,fp8,0,0.016707199811935424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,1,128,1,fp8,fp8,0,0.016681599617004394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,2,128,1,float16,float16,0,0.016808000206947327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,2,128,1,float16,fp8,0,0.01659200042486191
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,48,48,128,1,float16,fp8,0,0.04523519873619079
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,2,128,1,fp8,fp8,0,0.016582399606704712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,4,128,1,float16,float16,0,0.016571199893951415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,4,128,1,float16,fp8,0,0.016590400040149687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,4,128,1,fp8,fp8,0,0.01652960032224655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,8,128,1,float16,float16,0,0.018241600692272188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,8,128,1,float16,fp8,0,0.016582399606704712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,8,128,1,fp8,fp8,0,0.016572800278663636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,48,8,128,1,float16,fp8,0,0.024588799476623534
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,48,1,128,1,float16,float16,0,0.5862783908843994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,48,1,128,1,float16,float16,0,0.018036800622940063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,48,1,128,1,float16,fp8,0,0.6047088146209717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,48,1,128,1,fp8,fp8,0,0.6043439865112304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,48,2,128,1,float16,float16,0,0.5897295951843262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,48,2,128,1,float16,fp8,0,0.6036736011505127
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,48,2,128,1,fp8,fp8,0,0.6033423900604248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,48,4,128,1,float16,float16,0,0.608523178100586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,48,4,128,1,float16,fp8,0,0.6032464027404785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,48,4,128,1,fp8,fp8,0,0.6031343936920166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,48,8,128,1,float16,float16,0,0.6462592124938965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,48,8,128,1,fp8,fp8,0,0.6030399799346924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,48,128,1,float16,float16,0,0.5254767894744873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,48,128,1,float16,fp8,0,0.4731935977935791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,1,128,1,float16,float16,0,0.3010080099105835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,48,128,1,fp8,fp8,0,0.47336478233337403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,1,128,1,float16,fp8,0,0.30753440856933595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,1,128,1,fp8,fp8,0,0.30844480991363527
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,2,128,1,float16,float16,0,0.30100159645080565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,2,128,1,float16,fp8,0,0.307369589805603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,2,128,1,fp8,fp8,0,0.3078543901443481
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,4,128,1,float16,float16,0,0.30970239639282227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,4,128,1,float16,fp8,0,0.3075936079025269
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,4,128,1,fp8,fp8,0,0.3079279899597168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,8,128,1,float16,float16,0,0.3282000064849854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,8,128,1,float16,fp8,0,0.30752480030059814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,48,8,128,1,fp8,fp8,0,0.30792319774627686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,48,128,1,float16,float16,0,0.271399998664856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,48,128,1,float16,fp8,0,0.24408481121063233
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,48,128,1,fp8,fp8,0,0.24450080394744872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,1,128,1,float16,float16,0,0.15801600217819214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,1,128,1,float16,fp8,0,0.1614464044570923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,1,128,1,fp8,fp8,0,0.16169120073318483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,2,128,1,float16,float16,0,0.1584720015525818
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,2,128,1,float16,fp8,0,0.1613935947418213
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,2,128,1,fp8,fp8,0,0.16200000047683716
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,4,128,1,float16,float16,0,0.16400320529937745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,4,128,1,float16,fp8,0,0.16098079681396485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,48,8,128,1,float16,fp8,0,0.6032192230224609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,8,128,1,float16,float16,0,0.17256959676742553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,8,128,1,float16,fp8,0,0.16172159910202027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,8,128,1,fp8,fp8,0,0.1619328022003174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,48,128,1,float16,float16,0,0.14415359497070312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,48,128,1,float16,fp8,0,0.13049440383911132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,48,128,1,fp8,fp8,0,0.12936799526214598
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,1,128,1,float16,float16,0,0.08833119869232178
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,1,128,1,float16,fp8,0,0.08825920224189758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,1,128,1,fp8,fp8,0,0.0882207989692688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,2,128,1,float16,float16,0,0.08781120181083679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,2,128,1,float16,fp8,0,0.08813760280609131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,2,128,1,fp8,fp8,0,0.08819040060043334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,4,128,1,float16,float16,0,0.09035199880599976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,4,128,1,float16,fp8,0,0.08836159706115723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,4,128,1,fp8,fp8,0,0.0881488025188446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,8,128,1,float16,float16,0,0.0944495975971222
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,8,128,1,float16,fp8,0,0.08826079964637756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,48,8,128,1,fp8,fp8,0,0.08818560242652893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,48,128,1,float16,float16,0,0.08035200238227844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,48,128,1,float16,fp8,0,0.0731440007686615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,48,128,1,fp8,fp8,0,0.07218080163002014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,1,128,1,float16,float16,0,0.05140640139579773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,1,128,1,float16,fp8,0,0.049374398589134214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,1,128,1,fp8,fp8,0,0.04936000108718872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,2,128,1,float16,float16,0,0.051444798707962036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,2,128,1,float16,fp8,0,0.049296000599861146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,2,128,1,fp8,fp8,0,0.04937599897384644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,4,128,1,float16,float16,0,0.05256159901618958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,4,128,1,float16,fp8,0,0.049414399266242984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,4,128,1,fp8,fp8,0,0.04948320090770721
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,8,128,1,float16,float16,0,0.05564799904823303
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,8,128,1,float16,fp8,0,0.04936800003051758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,48,8,128,1,fp8,fp8,0,0.04983679950237274
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,48,128,1,float16,float16,0,0.04624319970607758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,48,128,1,float16,fp8,0,0.03920319974422455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,48,128,1,fp8,fp8,0,0.040596801042556765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,1,128,1,float16,float16,0,0.029084798693656922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,1,128,1,float16,fp8,0,0.02882719933986664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,1,128,1,fp8,fp8,0,0.028908801078796387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,2,128,1,float16,float16,0,0.02916640043258667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,2,128,1,float16,fp8,0,0.028863999247550964
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,2,128,1,fp8,fp8,0,0.02890079915523529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,4,128,1,float16,float16,0,0.030953601002693176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,4,128,1,float16,fp8,0,0.028972798585891725
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,4,128,1,fp8,fp8,0,0.02890239953994751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,8,128,1,float16,float16,0,0.031086400151252747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,8,128,1,float16,fp8,0,0.0288783997297287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,48,8,128,1,fp8,fp8,0,0.02887679934501648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,48,128,1,float16,float16,0,0.026907199621200563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,48,128,1,float16,fp8,0,0.026796799898147584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,48,128,1,fp8,fp8,0,0.026851201057434083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,1,128,1,float16,float16,0,0.022732800245285033
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,1,128,1,float16,fp8,0,0.020764799416065217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,1,128,1,fp8,fp8,0,0.02067199945449829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,2,128,1,float16,float16,0,0.022176000475883483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,2,128,1,fp8,fp8,0,0.020742399990558623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,48,4,128,1,fp8,fp8,0,0.16139999628067017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,4,128,1,float16,float16,0,0.022697600722312927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,4,128,1,float16,fp8,0,0.020759999752044678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,4,128,1,fp8,fp8,0,0.020670400559902193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,8,128,1,float16,float16,0,0.022830399870872497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,8,128,1,float16,fp8,0,0.020644800364971162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,8,128,1,fp8,fp8,0,0.022595199942588805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,48,128,1,float16,fp8,0,0.01866399943828583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,48,128,1,fp8,fp8,0,0.018503999710083006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,1,128,1,float16,float16,0,0.015961599349975587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,1,128,1,float16,fp8,0,0.014638400077819825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,1,128,1,fp8,fp8,0,0.014723199605941772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,2,128,1,float16,float16,0,0.01640319973230362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,2,128,1,float16,fp8,0,0.01467519998550415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,2,128,1,fp8,fp8,0,0.014542399346828461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,4,128,1,float16,float16,0,0.014843200147151948
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,4,128,1,float16,fp8,0,0.014582400023937226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,4,128,1,fp8,fp8,0,0.01467519998550415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,8,128,1,float16,float16,0,0.01655679941177368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,8,128,1,float16,fp8,0,0.014772799611091614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,8,128,1,fp8,fp8,0,0.014577600359916686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,48,128,1,float16,float16,0,0.01659200042486191
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,48,128,1,float16,fp8,0,0.014567999541759491
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,48,128,1,fp8,fp8,0,0.014894400537014008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,1,128,1,float16,float16,0,0.014572800695896148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,1,128,1,float16,fp8,0,0.014560000598430633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,1,128,1,fp8,fp8,0,0.014510400593280792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,48,2,128,1,float16,fp8,0,0.020735999941825865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,2,128,1,float16,float16,0,0.014590400457382201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,2,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,2,128,1,fp8,fp8,0,0.014396800100803376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,4,128,1,float16,float16,0,0.014519999921321868
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,4,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,4,128,1,fp8,fp8,0,0.014534400403499603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,8,128,1,float16,float16,0,0.014584000408649444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,8,128,1,float16,fp8,0,0.014475199580192565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,48,8,128,1,fp8,fp8,0,0.014511999487876893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,48,48,128,1,float16,float16,0,0.016641600430011748
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,48,1,128,1,float16,float16,0,0.35182240009307864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,48,1,128,1,float16,fp8,0,0.3626271963119507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,48,1,128,1,fp8,fp8,0,0.36231040954589844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,48,2,128,1,float16,float16,0,0.3525424003601074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,48,2,128,1,float16,fp8,0,0.36250879764556887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,48,2,128,1,fp8,fp8,0,0.36110079288482666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,48,4,128,1,float16,float16,0,0.3608448028564453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,48,4,128,1,float16,fp8,0,0.36234560012817385
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,48,4,128,1,fp8,fp8,0,0.3610368013381958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,48,8,128,1,float16,float16,0,0.3789407968521118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,48,128,1,float16,float16,0,0.2899456024169922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,48,8,128,1,fp8,fp8,0,0.36244161128997804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,48,8,128,1,float16,fp8,0,0.36107840538024905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,1,128,1,float16,float16,0,0.18250720500946044
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,48,128,1,float16,fp8,0,0.2697088003158569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,48,128,1,fp8,fp8,0,0.2691391944885254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,1,128,1,float16,fp8,0,0.1866655945777893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,1,128,1,fp8,fp8,0,0.18786560297012328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,2,128,1,float16,float16,0,0.18297599554061889
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,2,128,1,float16,fp8,0,0.18660639524459838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,2,128,1,fp8,fp8,0,0.1869215965270996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,4,128,1,float16,float16,0,0.18764159679412842
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,4,128,1,float16,fp8,0,0.18661439418792725
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,4,128,1,fp8,fp8,0,0.18670079708099366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,8,128,1,float16,float16,0,0.1950271964073181
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,8,128,1,float16,fp8,0,0.18717600107192994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,48,8,128,1,fp8,fp8,0,0.18661919832229615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,48,128,1,float16,fp8,0,0.14171520471572877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,1,128,1,float16,float16,0,0.09850080013275146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,48,128,1,fp8,fp8,0,0.14175679683685302
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,1,128,1,fp8,fp8,0,0.09845119714736938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,2,128,1,float16,fp8,0,0.09917920231819152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,2,128,1,fp8,fp8,0,0.09942079782485962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,4,128,1,float16,float16,0,0.10059679746627807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,4,128,1,float16,fp8,0,0.09928640127182006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,4,128,1,fp8,fp8,0,0.10010720491409301
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,8,128,1,float16,float16,0,0.1046895980834961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,8,128,1,float16,fp8,0,0.10050079822540284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,8,128,1,fp8,fp8,0,0.10007200241088868
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,48,128,1,float16,float16,0,0.08204479813575745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,48,128,1,float16,fp8,0,0.07607679963111877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,48,128,1,fp8,fp8,0,0.07624480128288269
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,1,128,1,float16,float16,0,0.05411679744720459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,1,128,1,float16,fp8,0,0.053439998626708986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,1,128,1,fp8,fp8,0,0.05350720286369324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,2,128,1,float16,float16,0,0.05504000186920166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,2,128,1,float16,fp8,0,0.054390400648117065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,48,128,1,float16,float16,0,0.1512752056121826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,2,128,1,fp8,fp8,0,0.054574400186538696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,4,128,1,float16,float16,0,0.05546879768371582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,4,128,1,float16,fp8,0,0.053566402196884154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,4,128,1,fp8,fp8,0,0.053513598442077634
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,8,128,1,float16,float16,0,0.05825600028038025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,2,128,1,float16,float16,0,0.09855679869651794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,8,128,1,float16,fp8,0,0.055353599786758426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,48,8,128,1,fp8,fp8,0,0.054743999242782594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,48,128,1,float16,float16,0,0.04803839921951294
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,48,128,1,float16,fp8,0,0.043329599499702456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,48,128,1,fp8,fp8,0,0.043115198612213135
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,1,128,1,float16,fp8,0,0.03283840119838714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,1,128,1,fp8,fp8,0,0.03275040090084076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,2,128,1,float16,float16,0,0.03296320140361786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,2,128,1,float16,fp8,0,0.03296160101890564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,2,128,1,fp8,fp8,0,0.03294720053672791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,4,128,1,float16,float16,0,0.03293760120868683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,4,128,1,float16,fp8,0,0.03288959860801697
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,4,128,1,fp8,fp8,0,0.0329263985157013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,8,128,1,float16,float16,0,0.03359839916229248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,8,128,1,float16,fp8,0,0.032953599095344545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,8,128,1,fp8,fp8,0,0.032793599367141726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,48,128,1,float16,float16,0,0.02300959974527359
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,48,128,1,float16,fp8,0,0.024828800559043886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,48,128,1,fp8,fp8,0,0.024859200417995452
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,1,128,1,float16,float16,0,0.020695999264717102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,1,128,1,float16,fp8,0,0.02064319998025894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,1,128,1,fp8,fp8,0,0.020667199790477753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,2,128,1,float16,float16,0,0.020633600652217865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,2,128,1,float16,fp8,0,0.020656000077724456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,2,128,1,fp8,fp8,0,0.02067680060863495
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,4,128,1,float16,float16,0,0.02070239931344986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,48,1,128,1,float16,fp8,0,0.09906399846076966
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,4,128,1,float16,fp8,0,0.020678399503231047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,4,128,1,fp8,fp8,0,0.020628799498081208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,8,128,1,float16,fp8,0,0.0206496000289917
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,8,128,1,fp8,fp8,0,0.020768000185489653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,48,128,1,float16,float16,0,0.018566399812698364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,48,128,1,float16,fp8,0,0.01863519996404648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,48,128,1,fp8,fp8,0,0.018593600392341612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,1,128,1,float16,float16,0,0.01653759926557541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,1,128,1,float16,fp8,0,0.01571040004491806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,1,128,1,fp8,fp8,0,0.015736000239849092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,2,128,1,float16,float16,0,0.0164560005068779
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,2,128,1,float16,fp8,0,0.01607840061187744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,2,128,1,fp8,fp8,0,0.016459199786186218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,4,128,1,float16,float16,0,0.016470399498939515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,4,128,1,float16,fp8,0,0.01653600037097931
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,4,128,1,fp8,fp8,0,0.01658080071210861
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,8,128,1,float16,float16,0,0.016510400176048278
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,8,128,1,float16,fp8,0,0.016371199488639833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,48,8,128,1,fp8,fp8,0,0.016335999965667723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,48,128,1,float16,float16,0,0.012556800246238708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,48,128,1,float16,fp8,0,0.012606400251388549
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,48,128,1,fp8,fp8,0,0.012532800436019897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,1,128,1,float16,float16,0,0.012614400684833526
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,1,128,1,float16,fp8,0,0.012558400630950928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,1,128,1,fp8,fp8,0,0.011846400052309036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,2,128,1,float16,float16,0,0.012408000230789185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,2,128,1,float16,fp8,0,0.011641599982976914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,2,128,1,fp8,fp8,0,0.012380799651145935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,4,128,1,float16,float16,0,0.012319999933242797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,48,8,128,1,float16,float16,0,0.020761600136756896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,4,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,48,1,128,1,float16,float16,0,0.032948800921440126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,8,128,1,float16,fp8,0,0.010532800108194351
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,8,128,1,fp8,fp8,0,0.012520000338554382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,48,128,1,float16,float16,0,0.012465599924325943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,48,128,1,float16,fp8,0,0.012359999865293504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,48,128,1,fp8,fp8,0,0.012433599680662155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,1,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,1,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,1,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,2,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,2,128,1,float16,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,2,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,4,128,1,float16,float16,0,0.01231359988451004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,4,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,8,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,8,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,8,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,48,1,128,1,float16,float16,0,0.2723680019378662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,48,1,128,1,float16,fp8,0,0.2726351976394653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,48,1,128,1,fp8,fp8,0,0.2726448059082031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,48,2,128,1,float16,float16,0,0.27268960475921633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,48,2,128,1,float16,fp8,0,0.2724639892578125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,4,128,1,float16,fp8,0,0.012390399724245072
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,48,8,128,1,float16,float16,0,0.012427199631929398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,48,2,128,1,fp8,fp8,0,0.2726095914840698
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,48,4,128,1,float16,float16,0,0.27779040336608884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,48,4,128,1,float16,fp8,0,0.2727247953414917
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,48,4,128,1,fp8,fp8,0,0.27215359210968015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,48,8,128,1,float16,float16,0,0.28534080982208254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,48,8,128,1,float16,fp8,0,0.27257599830627444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,48,8,128,1,fp8,fp8,0,0.27222399711608886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,48,4,128,1,fp8,fp8,0,0.010812799632549285
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,48,128,1,float16,float16,0,0.1933791995048523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,48,128,1,float16,fp8,0,0.18271199464797974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,1,128,1,float16,float16,0,0.14344160556793212
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,48,128,1,fp8,fp8,0,0.18255200386047363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,1,128,1,fp8,fp8,0,0.14177279472351073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,2,128,1,float16,float16,0,0.14353280067443847
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,2,128,1,float16,fp8,0,0.1414479970932007
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,2,128,1,fp8,fp8,0,0.1418544054031372
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,4,128,1,float16,float16,0,0.1455423951148987
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,4,128,1,float16,fp8,0,0.1414639949798584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,4,128,1,fp8,fp8,0,0.14168319702148438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,8,128,1,float16,float16,0,0.1496880054473877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,8,128,1,float16,fp8,0,0.1415120005607605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,8,128,1,fp8,fp8,0,0.14162399768829345
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,48,128,1,float16,float16,0,0.10228960514068604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,48,128,1,float16,fp8,0,0.09649919867515563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,48,128,1,fp8,fp8,0,0.09656479954719543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,1,128,1,float16,float16,0,0.076145601272583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,1,128,1,float16,fp8,0,0.07585279941558838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,1,128,1,fp8,fp8,0,0.07441440224647522
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,2,128,1,float16,float16,0,0.07692480087280273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,2,128,1,float16,fp8,0,0.07406719923019409
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,2,128,1,fp8,fp8,0,0.0755504012107849
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,4,128,1,float16,float16,0,0.0780463993549347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,4,128,1,float16,fp8,0,0.07593280076980591
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,4,128,1,fp8,fp8,0,0.07407039999961854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,8,128,1,float16,float16,0,0.08008800148963928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,8,128,1,float16,fp8,0,0.07597439885139465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,48,8,128,1,fp8,fp8,0,0.07604960203170777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,48,128,1,float16,float16,0,0.0576960027217865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,48,128,1,float16,fp8,0,0.05341920256614685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,48,128,1,fp8,fp8,0,0.05342400074005127
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,1,128,1,float16,float16,0,0.04315359890460968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,1,128,1,float16,fp8,0,0.041536000370979306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,1,128,1,fp8,fp8,0,0.043110400438308716
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,2,128,1,float16,float16,0,0.04320639967918396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,2,128,1,float16,fp8,0,0.0430864006280899
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,4,128,1,float16,float16,0,0.04323199987411499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,4,128,1,float16,fp8,0,0.04315040111541748
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,4,128,1,fp8,fp8,0,0.04181919991970062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,48,1,128,1,float16,fp8,0,0.14145760536193847
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,8,128,1,float16,float16,0,0.045265600085258484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,8,128,1,float16,fp8,0,0.04200479984283447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,48,128,1,float16,float16,0,0.030868801474571227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,48,128,1,float16,fp8,0,0.03092319965362549
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,48,128,1,fp8,fp8,0,0.03097440004348755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,1,128,1,float16,float16,0,0.026897600293159483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,1,128,1,float16,fp8,0,0.026704001426696777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,1,128,1,fp8,fp8,0,0.026811200380325317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,2,128,1,float16,float16,0,0.026895999908447266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,2,128,1,float16,fp8,0,0.026830399036407472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,2,128,1,fp8,fp8,0,0.02675839960575104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,4,128,1,float16,float16,0,0.026878398656845093
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,4,128,1,float16,fp8,0,0.026446399092674256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,4,128,1,fp8,fp8,0,0.02611680030822754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,8,128,1,float16,float16,0,0.02687999904155731
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,8,128,1,float16,fp8,0,0.026759999990463256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,48,8,128,1,fp8,fp8,0,0.02672159969806671
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,48,128,1,float16,float16,0,0.019169600307941438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,48,128,1,float16,fp8,0,0.020640000700950623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,48,128,1,fp8,fp8,0,0.018748800456523895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,2,128,1,fp8,fp8,0,0.04257439970970154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,1,128,1,float16,float16,0,0.016777600347995757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,1,128,1,float16,fp8,0,0.01658719927072525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,1,128,1,fp8,fp8,0,0.01672479957342148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,2,128,1,float16,float16,0,0.01659200042486191
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,2,128,1,float16,fp8,0,0.01669439971446991
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,2,128,1,fp8,fp8,0,0.016542400419712066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,4,128,1,float16,float16,0,0.01674560010433197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,4,128,1,float16,fp8,0,0.01656000018119812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,4,128,1,fp8,fp8,0,0.01671839952468872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,8,128,1,float16,float16,0,0.018614399433135986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,8,128,1,float16,fp8,0,0.016582399606704712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,48,8,128,1,fp8,fp8,0,0.016771200299263
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,48,128,1,float16,fp8,0,0.014521600306034088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,48,128,1,fp8,fp8,0,0.014711999893188476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,1,128,1,float16,fp8,0,0.014396800100803376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,1,128,1,fp8,fp8,0,0.014476799964904785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,2,128,1,float16,float16,0,0.014526399970054626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,2,128,1,float16,fp8,0,0.014406399428844452
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,2,128,1,fp8,fp8,0,0.014606399834156037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,4,128,1,float16,float16,0,0.01451520025730133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,4,128,1,float16,fp8,0,0.01448799967765808
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,4,128,1,fp8,fp8,0,0.014455999433994293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,8,128,1,float16,float16,0,0.014667199552059173
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,8,128,1,float16,fp8,0,0.013451200723648072
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,8,128,1,fp8,fp8,0,0.013555200397968292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,48,128,1,float16,float16,0,0.012535999715328216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,48,128,1,float16,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,48,128,1,fp8,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,1,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,1,128,1,float16,fp8,0,0.010598400235176086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,1,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,2,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,2,128,1,float16,fp8,0,0.010571199655532836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,2,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,4,128,1,float16,float16,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,4,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,4,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,48,8,128,1,fp8,fp8,0,0.04216960072517395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,8,128,1,float16,fp8,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,48,128,1,float16,float16,0,0.014593599736690522
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,8,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,48,128,1,float16,float16,0,0.011115200072526931
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,48,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,48,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,1,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,1,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,1,128,1,fp8,fp8,0,0.010305599868297577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,2,128,1,float16,float16,0,0.010371199995279311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,2,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,2,128,1,fp8,fp8,0,0.010367999970912933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,4,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,4,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,4,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,8,128,1,float16,float16,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,8,128,1,float16,fp8,0,0.010361599922180175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,48,8,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,48,1,128,1,float16,float16,0,0.2351151943206787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,48,1,128,1,float16,fp8,0,0.22830560207366943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,48,1,128,1,fp8,fp8,0,0.22920799255371094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,48,2,128,1,float16,float16,0,0.23560960292816163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,48,2,128,1,float16,fp8,0,0.22906239032745362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,48,8,128,1,float16,float16,0,0.010567999631166457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,48,2,128,1,fp8,fp8,0,0.22901599407196044
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,48,1,128,1,float16,float16,0,0.01488959938287735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,48,4,128,1,float16,float16,0,0.2360304117202759
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,48,4,128,1,float16,fp8,0,0.2292799949645996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,48,4,128,1,fp8,fp8,0,0.22777121067047118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,48,8,128,1,float16,fp8,0,0.22893280982971193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,48,128,1,float16,float16,0,0.14751839637756348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,48,8,128,1,fp8,fp8,0,0.22772159576416015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,48,128,1,float16,fp8,0,0.1397055983543396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,48,128,1,fp8,fp8,0,0.13953280448913574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,1,128,1,float16,float16,0,0.12310880422592163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,1,128,1,float16,fp8,0,0.1170240044593811
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,1,128,1,fp8,fp8,0,0.11787680387496949
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,2,128,1,float16,float16,0,0.12294559478759766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,2,128,1,float16,fp8,0,0.11815520524978637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,2,128,1,fp8,fp8,0,0.11683839559555054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,4,128,1,float16,float16,0,0.12433600425720215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,4,128,1,float16,fp8,0,0.11692320108413697
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,4,128,1,fp8,fp8,0,0.11708320379257202
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,8,128,1,float16,float16,0,0.12546240091323851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,8,128,1,float16,fp8,0,0.11896320581436157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,48,8,128,1,fp8,fp8,0,0.11704800128936768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,48,128,1,float16,float16,0,0.07993119955062866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,48,128,1,float16,fp8,0,0.07387199997901917
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,48,128,1,fp8,fp8,0,0.073990398645401
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,1,128,1,float16,float16,0,0.06571199893951415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,1,128,1,float16,fp8,0,0.0636672019958496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,1,128,1,fp8,fp8,0,0.06368160247802734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,2,128,1,float16,float16,0,0.06591839790344238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,2,128,1,fp8,fp8,0,0.06366879940032959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,4,128,1,float16,float16,0,0.06578559875488281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,4,128,1,fp8,fp8,0,0.06358879804611206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,8,128,1,float16,float16,0,0.0676144003868103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,48,8,128,1,float16,float16,0,0.23998560905456542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,8,128,1,float16,fp8,0,0.06374559998512268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,8,128,1,fp8,fp8,0,0.06365919709205628
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,48,128,1,float16,float16,0,0.04147520065307617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,48,128,1,float16,fp8,0,0.04124639928340912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,48,128,1,fp8,fp8,0,0.0412559986114502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,1,128,1,float16,float16,0,0.03714239895343781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,1,128,1,fp8,fp8,0,0.03697279989719391
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,2,128,1,float16,float16,0,0.0372079998254776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,2,128,1,float16,fp8,0,0.03708159923553467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,2,128,1,fp8,fp8,0,0.036881598830223086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,4,128,1,float16,float16,0,0.03722400069236755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,4,128,1,float16,fp8,0,0.03702560067176819
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,4,128,1,fp8,fp8,0,0.03707840144634247
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,8,128,1,float16,float16,0,0.039124798774719236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,8,128,1,float16,fp8,0,0.03707039952278137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,8,128,1,fp8,fp8,0,0.03700959980487824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,48,128,1,float16,float16,0,0.0256415992975235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,48,128,1,float16,fp8,0,0.024934400618076325
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,48,128,1,fp8,fp8,0,0.0249551996588707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,2,128,1,float16,fp8,0,0.06362879872322083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,1,128,1,float16,float16,0,0.02468640059232712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,1,128,1,float16,fp8,0,0.022758400440216063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,1,128,1,fp8,fp8,0,0.022860799729824067
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,48,4,128,1,float16,fp8,0,0.06363360285758972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,2,128,1,float16,float16,0,0.024639999866485594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,2,128,1,float16,fp8,0,0.022856000065803527
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,2,128,1,fp8,fp8,0,0.022697600722312927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,4,128,1,float16,float16,0,0.02466239929199219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,4,128,1,float16,fp8,0,0.022833600640296936
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,4,128,1,fp8,fp8,0,0.02273920029401779
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,8,128,1,float16,float16,0,0.02467840015888214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,8,128,1,float16,fp8,0,0.022865599393844603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,48,8,128,1,fp8,fp8,0,0.02268799990415573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,48,128,1,float16,fp8,0,0.016577599942684172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,48,128,1,fp8,fp8,0,0.01655679941177368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,1,128,1,float16,float16,0,0.017084799706935883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,1,128,1,float16,fp8,0,0.01642719954252243
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,1,128,1,fp8,fp8,0,0.01520639955997467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,2,128,1,float16,fp8,0,0.015462400019168853
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,2,128,1,fp8,fp8,0,0.015647999942302704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,4,128,1,float16,float16,0,0.016539199650287627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,48,1,128,1,float16,fp8,0,0.0352512001991272
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,4,128,1,float16,fp8,0,0.01648000031709671
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,4,128,1,fp8,fp8,0,0.014579200744628906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,8,128,1,float16,float16,0,0.01659359931945801
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,8,128,1,float16,fp8,0,0.014585599303245544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,8,128,1,fp8,fp8,0,0.0165120005607605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,48,128,1,float16,float16,0,0.014593599736690522
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,48,128,1,float16,fp8,0,0.01249919980764389
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,48,128,1,fp8,fp8,0,0.01252480000257492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,1,128,1,float16,float16,0,0.013035200536251068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,1,128,1,float16,fp8,0,0.012476799637079239
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,1,128,1,fp8,fp8,0,0.012676799297332763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,2,128,1,float16,float16,0,0.012459199875593185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,2,128,1,float16,fp8,0,0.012582400441169738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,2,128,1,fp8,fp8,0,0.012455999851226807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,4,128,1,float16,float16,0,0.012518399953842163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,4,128,1,float16,fp8,0,0.012539200484752655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,4,128,1,fp8,fp8,0,0.012595200538635254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,8,128,1,float16,float16,0,0.012544000148773193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,8,128,1,float16,fp8,0,0.012563200294971466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,48,8,128,1,fp8,fp8,0,0.012646399438381195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,48,128,1,float16,float16,0,0.01250240057706833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,48,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,48,128,1,fp8,fp8,0,0.010532800108194351
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,1,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,1,128,1,float16,fp8,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,1,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,48,128,1,float16,float16,0,0.018534399569034576
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,2,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,2,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,2,128,1,fp8,fp8,0,0.010340800136327743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,4,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,4,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,48,2,128,1,float16,float16,0,0.01658399999141693
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,4,128,1,fp8,fp8,0,0.010283199697732925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,8,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,8,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,48,8,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,48,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,48,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,1,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,1,128,1,float16,fp8,0,0.009139200299978256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,2,128,1,float16,float16,0,0.010351999849081039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,2,128,1,float16,fp8,0,0.00931679978966713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,2,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,4,128,1,float16,float16,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,4,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,4,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,8,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,8,128,1,float16,fp8,0,0.009105599671602248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,8,128,1,fp8,fp8,0,0.00904799997806549
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,48,1,128,1,float16,float16,0,0.22182559967041016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,48,1,128,1,float16,fp8,0,0.20929598808288574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,48,1,128,1,fp8,fp8,0,0.2094559907913208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,48,2,128,1,float16,float16,0,0.22243199348449708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,48,2,128,1,float16,fp8,0,0.2092895984649658
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,48,2,128,1,fp8,fp8,0,0.21091840267181397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,48,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,48,4,128,1,float16,float16,0,0.2257823944091797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,48,4,128,1,float16,fp8,0,0.2111407995223999
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,48,1,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,48,4,128,1,fp8,fp8,0,0.21132159233093262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,48,8,128,1,float16,fp8,0,0.2111664056777954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,48,8,128,1,float16,float16,0,0.2278592109680176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,48,8,128,1,fp8,fp8,0,0.21043360233306885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,48,128,1,float16,float16,0,0.1297711968421936
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,48,128,1,float16,fp8,0,0.1209712028503418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,48,128,1,fp8,fp8,0,0.12102400064468384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,1,128,1,float16,float16,0,0.11501760482788086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,1,128,1,float16,fp8,0,0.10909119844436646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,1,128,1,fp8,fp8,0,0.10889279842376709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,2,128,1,float16,float16,0,0.11687840223312378
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,2,128,1,float16,fp8,0,0.10893440246582031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,2,128,1,fp8,fp8,0,0.10936319828033447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,4,128,1,float16,float16,0,0.1167199969291687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,4,128,1,float16,fp8,0,0.10872000455856323
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,4,128,1,fp8,fp8,0,0.10888799428939819
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,8,128,1,float16,float16,0,0.11703039407730102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,8,128,1,float16,fp8,0,0.10891519784927368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,48,8,128,1,fp8,fp8,0,0.10869760513305664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,48,128,1,float16,fp8,0,0.0637391984462738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,48,128,1,fp8,fp8,0,0.0636575996875763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,1,128,1,float16,float16,0,0.06244000196456909
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,1,128,1,fp8,fp8,0,0.059350401163101196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,2,128,1,float16,float16,0,0.06293280124664306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,2,128,1,float16,fp8,0,0.05964959859848022
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,2,128,1,fp8,fp8,0,0.05951039791107178
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,4,128,1,float16,float16,0,0.0616208016872406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,4,128,1,float16,fp8,0,0.059592002630233766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,4,128,1,fp8,fp8,0,0.0593999981880188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,8,128,1,float16,float16,0,0.0636672019958496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,8,128,1,float16,fp8,0,0.059280002117156984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,8,128,1,fp8,fp8,0,0.05949919819831848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,48,128,1,float16,float16,0,0.037028801441192624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,48,128,1,float16,fp8,0,0.037057599425315856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,48,128,1,fp8,fp8,0,0.03704800009727478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,1,128,1,float16,float16,0,0.035041600465774536
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,1,128,1,float16,fp8,0,0.03379839956760407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,1,128,1,fp8,fp8,0,0.03469600081443787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,2,128,1,float16,float16,0,0.03510560095310211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,2,128,1,float16,fp8,0,0.034913599491119385
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,2,128,1,fp8,fp8,0,0.03495840132236481
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,4,128,1,float16,float16,0,0.0350735992193222
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,4,128,1,float16,fp8,0,0.03494240045547485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,4,128,1,fp8,fp8,0,0.03490560054779053
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,8,128,1,float16,float16,0,0.0355536013841629
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,8,128,1,float16,fp8,0,0.034561601281166074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,48,128,1,float16,float16,0,0.0247311994433403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,48,128,1,float16,fp8,0,0.022697600722312927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,48,128,1,fp8,fp8,0,0.022731199860572815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,1,128,1,float16,float16,0,0.022672000527381896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,1,128,1,float16,fp8,0,0.02072319984436035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,1,128,1,fp8,fp8,0,0.02229280024766922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,2,128,1,float16,float16,0,0.02268799990415573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,2,128,1,float16,fp8,0,0.022360000014305114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,48,128,1,float16,float16,0,0.0675711989402771
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,2,128,1,fp8,fp8,0,0.022707200050354003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,48,1,128,1,float16,fp8,0,0.05962399840354919
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,4,128,1,float16,float16,0,0.022753599286079406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,4,128,1,float16,fp8,0,0.022625599801540375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,4,128,1,fp8,fp8,0,0.022601599991321563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,8,128,1,float16,float16,0,0.022809599339962006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,8,128,1,float16,fp8,0,0.022657600045204163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,48,128,1,float16,float16,0,0.01671680063009262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,48,128,1,float16,fp8,0,0.016521599888801575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,48,128,1,fp8,fp8,0,0.01658080071210861
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,1,128,1,float16,float16,0,0.0165583997964859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,1,128,1,float16,fp8,0,0.014587199687957764
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,1,128,1,fp8,fp8,0,0.014640000462532044
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,2,128,1,float16,float16,0,0.016540800034999848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,2,128,1,float16,fp8,0,0.014596800506114959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,2,128,1,fp8,fp8,0,0.01462240070104599
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,4,128,1,float16,float16,0,0.015846399962902068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,4,128,1,float16,fp8,0,0.014630399644374847
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,48,8,128,1,fp8,fp8,0,0.0333840012550354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,4,128,1,fp8,fp8,0,0.014710399508476257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,8,128,1,float16,float16,0,0.014633600413799287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,8,128,1,float16,fp8,0,0.01462559998035431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,48,8,128,1,fp8,fp8,0,0.014523200690746307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,48,128,1,float16,float16,0,0.014460800588130951
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,48,128,1,float16,fp8,0,0.012532800436019897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,48,128,1,fp8,fp8,0,0.012542399764060973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,1,128,1,float16,float16,0,0.012569600343704223
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,1,128,1,float16,fp8,0,0.012428800016641617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,1,128,1,fp8,fp8,0,0.012444800138473511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,2,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,2,128,1,float16,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,2,128,1,fp8,fp8,0,0.012647999823093415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,4,128,1,float16,float16,0,0.012491200119256973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,4,128,1,float16,fp8,0,0.012548799812793731
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,4,128,1,fp8,fp8,0,0.012452799826860428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,8,128,1,float16,float16,0,0.012670400738716125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,8,128,1,float16,fp8,0,0.012503999471664428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,48,8,128,1,fp8,fp8,0,0.01252480000257492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,48,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,48,128,1,fp8,fp8,0,0.010320000350475311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,48,8,128,1,fp8,fp8,0,0.022654399275779724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,1,128,1,float16,float16,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,1,128,1,float16,fp8,0,0.010337600111961364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,2,128,1,float16,float16,0,0.010334400087594986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,2,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,2,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,4,128,1,float16,float16,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,4,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,4,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,8,128,1,float16,float16,0,0.01053600013256073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,8,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,8,128,1,fp8,fp8,0,0.010302399843931198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,48,128,1,float16,float16,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,48,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,1,128,1,float16,float16,0,0.008764799684286118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,1,128,1,float16,fp8,0,0.008579199761152267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,1,128,1,fp8,fp8,0,0.008603200316429138
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,2,128,1,float16,float16,0,0.008667200058698653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,2,128,1,float16,fp8,0,0.00856959968805313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,2,128,1,fp8,fp8,0,0.008544000238180161
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,4,128,1,float16,float16,0,0.009520000219345093
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,4,128,1,float16,fp8,0,0.008568000048398972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,48,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,4,128,1,fp8,fp8,0,0.009319999814033508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,8,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,8,128,1,float16,fp8,0,0.009292799979448318
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,8,128,1,fp8,fp8,0,0.008478400111198426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,48,1,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,48,1,128,1,float16,fp8,0,0.204367995262146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,48,1,128,1,float16,float16,0,0.22175359725952148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,48,1,128,1,fp8,fp8,0,0.20300960540771484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,48,2,128,1,float16,float16,0,0.2194672107696533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,48,2,128,1,float16,fp8,0,0.2047951936721802
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,48,2,128,1,fp8,fp8,0,0.20320639610290528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,48,4,128,1,float16,float16,0,0.2193552017211914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,48,4,128,1,float16,fp8,0,0.20444159507751464
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,48,4,128,1,fp8,fp8,0,0.20352799892425538
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,48,8,128,1,float16,float16,0,0.21951839923858643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,48,8,128,1,float16,fp8,0,0.2045072078704834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,0,0.11482880115509034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,0,0.10686719417572021
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,48,128,1,fp8,fp8,0,0.106769597530365
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,48,48,128,1,fp8,fp8,0,0.01034879982471466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,1,128,1,float16,float16,0,0.11497279405593872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,1,128,1,float16,fp8,0,0.10667840242385865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,2,128,1,float16,float16,0,0.11500320434570313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,2,128,1,float16,fp8,0,0.1068160057067871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,2,128,1,fp8,fp8,0,0.10659840106964111
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,4,128,1,float16,float16,0,0.115009605884552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,4,128,1,float16,fp8,0,0.10665600299835205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,4,128,1,fp8,fp8,0,0.1068079948425293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,8,128,1,float16,float16,0,0.11488159894943237
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,8,128,1,float16,fp8,0,0.10681760311126709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,8,128,1,fp8,fp8,0,0.10659999847412109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,0,0.05759040117263794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,48,128,1,fp8,fp8,0,0.05756000280380249
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,1,128,1,float16,float16,0,0.061622399091720584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,1,128,1,float16,fp8,0,0.057520002126693726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,1,128,1,fp8,fp8,0,0.05758399963378906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,48,8,128,1,fp8,fp8,0,0.2043936014175415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,2,128,1,float16,float16,0,0.06267520189285278
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,48,1,128,1,fp8,fp8,0,0.10681120157241822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,2,128,1,float16,fp8,0,0.057678401470184326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,2,128,1,fp8,fp8,0,0.05760480165481567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,4,128,1,float16,float16,0,0.0625935971736908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,4,128,1,float16,fp8,0,0.057576000690460205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,4,128,1,fp8,fp8,0,0.05760480165481567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,8,128,1,float16,float16,0,0.062084800004959105
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,8,128,1,float16,fp8,0,0.05761439800262451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,8,128,1,fp8,fp8,0,0.05764639973640442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,0,0.037041598558425905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,0,0.03376159965991974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,48,128,1,fp8,fp8,0,0.03319360017776489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,1,128,1,float16,float16,0,0.03512639999389648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,0,0.06363679766654969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,1,128,1,float16,fp8,0,0.033076798915863036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,1,128,1,fp8,fp8,0,0.034692800045013426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,2,128,1,float16,float16,0,0.03503359854221344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,2,128,1,float16,fp8,0,0.03304960131645203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,2,128,1,fp8,fp8,0,0.034771201014518735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,4,128,1,float16,float16,0,0.03502239882946014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,4,128,1,float16,fp8,0,0.033046400547027587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,4,128,1,fp8,fp8,0,0.034756800532341
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,8,128,1,float16,fp8,0,0.03312000036239624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,8,128,1,fp8,fp8,0,0.03319680094718933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,0,0.023814399540424348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,0,0.021675199270248413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,48,128,1,fp8,fp8,0,0.02269600033760071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,1,128,1,float16,fp8,0,0.021865600347518922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,1,128,1,fp8,fp8,0,0.021899199485778807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,2,128,1,float16,float16,0,0.022806400060653688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,2,128,1,float16,fp8,0,0.022140799462795256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,2,128,1,fp8,fp8,0,0.022342400252819063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,4,128,1,float16,float16,0,0.022838400304317476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,4,128,1,float16,fp8,0,0.02072480022907257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,4,128,1,fp8,fp8,0,0.020744000375270844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,8,128,1,float16,float16,0,0.022617599368095397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,8,128,1,float16,fp8,0,0.020776000618934632
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,8,128,1,fp8,fp8,0,0.02085600048303604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,0,0.016707199811935424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,0,0.014699199795722961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,48,128,1,fp8,fp8,0,0.014617599546909332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,1,128,1,float16,float16,0,0.016468800604343414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,1,128,1,fp8,fp8,0,0.014688000082969666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,2,128,1,float16,float16,0,0.014633600413799287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,2,128,1,float16,fp8,0,0.014575999975204468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,2,128,1,fp8,fp8,0,0.014752000570297241
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,48,8,128,1,float16,float16,0,0.035102400183677676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,4,128,1,float16,float16,0,0.015044799447059632
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,4,128,1,float16,fp8,0,0.014550399780273438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,4,128,1,fp8,fp8,0,0.014638400077819825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,8,128,1,float16,float16,0,0.015318399667739869
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,8,128,1,fp8,fp8,0,0.014555199444293976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,8,128,1,float16,fp8,0,0.014655999839305878
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,0,0.01363680064678192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,0,0.012566399574279786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,48,128,1,fp8,fp8,0,0.01247360035777092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,1,128,1,float16,float16,0,0.012622399628162384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,1,128,1,float16,fp8,0,0.012454400211572647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,2,128,1,float16,float16,0,0.01255040019750595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,2,128,1,float16,fp8,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,2,128,1,fp8,fp8,0,0.012491200119256973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,4,128,1,float16,float16,0,0.012452799826860428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,4,128,1,float16,fp8,0,0.012899200618267059
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,4,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,8,128,1,float16,float16,0,0.012452799826860428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,8,128,1,float16,fp8,0,0.0124719999730587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,8,128,1,fp8,fp8,0,0.012454400211572647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,48,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,1,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,1,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,48,1,128,1,float16,fp8,0,0.014659200608730317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,1,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,2,128,1,float16,float16,0,0.010620799660682679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,2,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,2,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,4,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,4,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,4,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,8,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,8,128,1,float16,fp8,0,0.009428799897432328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,48,8,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,0,0.009808000177145004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,48,128,1,fp8,fp8,0,0.00846560001373291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,1,128,1,float16,float16,0,0.010356800258159637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,1,128,1,float16,fp8,0,0.008497600257396699
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,1,128,1,fp8,fp8,0,0.010358399897813796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,2,128,1,float16,float16,0,0.009761600196361542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,2,128,1,float16,fp8,0,0.010300800204277039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,2,128,1,fp8,fp8,0,0.008387199789285659
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,4,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,4,128,1,float16,fp8,0,0.008448000252246856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,4,128,1,fp8,fp8,0,0.01032480001449585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,8,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,8,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,48,8,128,1,fp8,fp8,0,0.009172800183296203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,48,1,128,1,fp8,fp8,0,0.012555199861526489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,48,1,128,1,float16,float16,0,0.022697600722312927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,40,1,128,1,float16,fp8,0,9.829307556152344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,40,1,128,1,float16,float16,0,13.028822326660157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,40,1,128,1,fp8,fp8,0,9.93515396118164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,40,2,128,1,float16,float16,0,13.105772399902344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,40,2,128,1,float16,fp8,0,9.875326538085938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,40,2,128,1,fp8,fp8,0,9.857421112060546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,40,4,128,1,float16,float16,0,13.526448059082032
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,40,4,128,1,float16,fp8,0,9.837680053710937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,40,4,128,1,fp8,fp8,0,10.143743896484375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,40,8,128,1,float16,float16,0,13.86754150390625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,40,8,128,1,float16,fp8,0,10.305487823486327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,40,8,128,1,fp8,fp8,0,10.011283111572265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,40,128,1,float16,fp8,0,5.1926830291748045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,40,128,1,fp8,fp8,0,5.262369537353516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,1,128,1,float16,float16,0,6.531903839111328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,1,128,1,float16,fp8,0,4.8917793273925785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,1,128,1,fp8,fp8,0,4.985718536376953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,2,128,1,float16,float16,0,6.688942718505859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,2,128,1,float16,fp8,0,4.92010726928711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,2,128,1,fp8,fp8,0,5.021575927734375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,4,128,1,float16,float16,0,6.8135826110839846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,4,128,1,float16,fp8,0,4.934659194946289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,4,128,1,fp8,fp8,0,5.043430328369141
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,8,128,1,float16,fp8,0,5.050444793701172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,8,128,1,fp8,fp8,0,5.036145782470703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,8,128,1,float16,float16,0,6.722555541992188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,40,128,1,float16,fp8,0,2.7375568389892577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,40,128,1,fp8,fp8,0,2.7774959564208985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,1,128,1,float16,fp8,0,2.4754655838012694
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,1,128,1,float16,float16,0,3.338937759399414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,1,128,1,fp8,fp8,0,2.522587203979492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,2,128,1,float16,float16,0,3.468812942504883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,2,128,1,float16,fp8,0,2.4923984527587892
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,2,128,1,fp8,fp8,0,2.7242656707763673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,4,128,1,float16,float16,0,3.081478309631348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,4,128,1,float16,fp8,0,2.50555362701416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,4,128,1,fp8,fp8,0,2.7179887771606444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,8,128,1,float16,float16,0,3.1418384552001952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,8,128,1,float16,fp8,0,2.4974672317504885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,8,128,1,fp8,fp8,0,2.7937200546264647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,40,128,1,float16,fp8,0,1.391702365875244
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,40,128,1,fp8,fp8,0,1.374465560913086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,1,128,1,float16,float16,0,1.5882783889770509
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,1,128,1,float16,fp8,0,1.5165439605712892
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,1,128,1,fp8,fp8,0,1.3064847946166993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,2,128,1,float16,float16,0,1.6156816482543945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,2,128,1,float16,fp8,0,1.487343978881836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,2,128,1,fp8,fp8,0,1.3684080123901368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,4,128,1,float16,float16,0,1.6099071502685547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,4,128,1,float16,fp8,0,1.380769634246826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,4,128,1,fp8,fp8,0,1.3100624084472656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,8,128,1,float16,float16,0,1.6146703720092774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,8,128,1,float16,fp8,0,1.3390000343322754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,8,128,1,fp8,fp8,0,1.3042431831359864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,40,1,128,1,float16,float16,0,7.7938995361328125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,40,1,128,1,float16,fp8,0,5.864440155029297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,40,1,128,1,fp8,fp8,0,5.875985717773437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,40,2,128,1,float16,float16,0,7.848843383789062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,40,2,128,1,float16,fp8,0,5.854971313476563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,40,2,128,1,fp8,fp8,0,5.870038223266602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,40,4,128,1,float16,float16,0,7.951292419433594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,40,4,128,1,float16,fp8,0,5.859972763061523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,40,4,128,1,fp8,fp8,0,5.919404983520508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,40,8,128,1,float16,float16,0,8.059553527832032
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,40,8,128,1,float16,fp8,0,5.884004974365235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,40,8,128,1,fp8,fp8,0,5.89715347290039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,40,40,128,1,float16,float16,0,3.2117889404296873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,40,40,128,1,float16,float16,0,1.650067138671875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,40,128,1,float16,float16,0,3.9206001281738283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,40,40,128,1,float16,float16,0,6.751214599609375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,40,128,1,float16,fp8,0,3.0822351455688475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,40,128,1,fp8,fp8,0,3.213644790649414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,1,128,1,float16,float16,0,3.7194942474365233
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,1,128,1,float16,fp8,0,2.939656066894531
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,1,128,1,fp8,fp8,0,2.861921691894531
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,2,128,1,float16,float16,0,3.516774368286133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,2,128,1,float16,fp8,0,3.0884479522705077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,2,128,1,fp8,fp8,0,2.893937683105469
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,4,128,1,float16,float16,0,3.4831409454345703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,4,128,1,float16,fp8,0,2.978019142150879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,4,128,1,fp8,fp8,0,2.932444763183594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,8,128,1,float16,float16,0,3.6418735504150392
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,8,128,1,float16,fp8,0,3.0071216583251954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,40,128,1,float16,float16,0,2.011182403564453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,40,8,128,1,fp8,fp8,0,2.8780431747436523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,40,128,1,float16,fp8,0,1.8409183502197266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,40,128,1,fp8,fp8,0,1.5805904388427734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,1,128,1,float16,fp8,0,1.4675264358520508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,1,128,1,float16,float16,0,1.9153024673461914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,1,128,1,fp8,fp8,0,1.483193588256836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,2,128,1,float16,fp8,0,1.4643648147583008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,2,128,1,float16,float16,0,1.7399263381958008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,2,128,1,fp8,fp8,0,1.6540592193603516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,4,128,1,float16,float16,0,1.7966896057128907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,4,128,1,float16,fp8,0,1.4708239555358886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,4,128,1,fp8,fp8,0,1.6802576065063477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,8,128,1,float16,float16,0,1.8370576858520509
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,8,128,1,float16,fp8,0,1.526036834716797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,40,128,1,float16,float16,0,1.0073087692260743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,40,8,128,1,fp8,fp8,0,1.4688575744628907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,40,128,1,float16,fp8,0,0.8614080429077149
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,40,128,1,fp8,fp8,0,0.8334112167358398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,1,128,1,float16,float16,0,0.9049391746520996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,1,128,1,float16,fp8,0,0.8045760154724121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,1,128,1,fp8,fp8,0,0.7674191951751709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,2,128,1,float16,float16,0,0.9097184181213379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,2,128,1,float16,fp8,0,0.7712575912475585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,2,128,1,fp8,fp8,0,0.7688831806182861
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,4,128,1,float16,fp8,0,0.7715295791625977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,4,128,1,float16,float16,0,0.9140751838684082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,4,128,1,fp8,fp8,0,0.7701183795928955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,8,128,1,float16,fp8,0,0.8493087768554688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,8,128,1,fp8,fp8,0,0.7682352066040039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,40,8,128,1,float16,float16,0,0.9501664161682128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,40,1,128,1,float16,fp8,0,4.07258415222168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,40,1,128,1,float16,float16,0,5.147633743286133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,40,1,128,1,fp8,fp8,0,4.0869087219238285
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,40,2,128,1,float16,fp8,0,4.080707168579101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,40,2,128,1,float16,float16,0,5.316697692871093
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,40,2,128,1,fp8,fp8,0,4.199758529663086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,40,4,128,1,float16,float16,0,5.241993713378906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,40,4,128,1,float16,fp8,0,4.0995361328125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,40,4,128,1,fp8,fp8,0,4.218990325927734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,40,8,128,1,float16,float16,0,5.1390033721923825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,40,8,128,1,float16,fp8,0,4.31300163269043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,40,128,1,float16,float16,0,2.8760656356811523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,40,8,128,1,fp8,fp8,0,4.091318511962891
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,40,128,1,float16,fp8,0,2.429088020324707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,40,128,1,fp8,fp8,0,2.2514720916748048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,1,128,1,float16,float16,0,2.678071975708008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,1,128,1,fp8,fp8,0,2.0549856185913087
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,2,128,1,float16,float16,0,2.4567792892456053
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,2,128,1,fp8,fp8,0,2.0794368743896485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,2,128,1,float16,fp8,0,2.369416046142578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,1,128,1,float16,fp8,0,2.1049280166625977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,4,128,1,float16,float16,0,2.5504159927368164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,4,128,1,float16,fp8,0,2.0590255737304686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,4,128,1,fp8,fp8,0,2.0669839859008787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,40,128,1,float16,float16,0,1.4506735801696777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,8,128,1,float16,float16,0,2.4837104797363283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,8,128,1,float16,fp8,0,2.328118324279785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,40,8,128,1,fp8,fp8,0,2.0795967102050783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,40,128,1,float16,fp8,0,1.3137616157531737
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,40,128,1,fp8,fp8,0,1.3444304466247559
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,1,128,1,float16,fp8,0,1.066703987121582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,1,128,1,float16,float16,0,1.260483169555664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,1,128,1,fp8,fp8,0,1.063809585571289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,2,128,1,float16,float16,0,1.2392111778259278
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,2,128,1,fp8,fp8,0,1.0947183609008788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,4,128,1,float16,float16,0,1.2942000389099122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,4,128,1,float16,fp8,0,1.1053647994995117
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,4,128,1,fp8,fp8,0,1.0758543968200684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,8,128,1,float16,fp8,0,1.1053791999816895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,8,128,1,float16,float16,0,1.330235195159912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,8,128,1,fp8,fp8,0,1.0616255760192872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,40,128,1,fp8,fp8,0,0.6100448131561279
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,40,128,1,float16,float16,0,0.7450160026550293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,40,2,128,1,float16,fp8,0,1.0585280418395997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,1,128,1,float16,fp8,0,0.5597104072570801
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,1,128,1,float16,float16,0,0.659830379486084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,1,128,1,fp8,fp8,0,0.5594736099243164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,2,128,1,float16,fp8,0,0.5819712162017823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,2,128,1,float16,float16,0,0.6547904014587402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,2,128,1,fp8,fp8,0,0.5594399929046631
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,4,128,1,float16,fp8,0,0.5602047920227051
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,4,128,1,float16,float16,0,0.6701727867126465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,4,128,1,fp8,fp8,0,0.5591375827789307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,8,128,1,float16,float16,0,0.6681727886199951
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,8,128,1,fp8,fp8,0,0.5604144096374511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,40,128,1,float16,fp8,0,0.6857999801635742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,40,8,128,1,float16,fp8,0,0.5910511970520019
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,40,1,128,1,float16,fp8,0,5.393361663818359
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,40,1,128,1,fp8,fp8,0,5.462480163574218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,40,1,128,1,float16,float16,0,6.747652435302735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,40,2,128,1,float16,float16,0,6.878695678710938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,40,2,128,1,float16,fp8,0,5.390662384033203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,40,2,128,1,fp8,fp8,0,5.482519912719726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,40,4,128,1,float16,float16,0,6.797029113769531
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,40,4,128,1,float16,fp8,0,5.5925647735595705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,40,4,128,1,fp8,fp8,0,5.4144432067871096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,40,8,128,1,float16,float16,0,7.231334686279297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,40,8,128,1,float16,fp8,0,5.4567920684814455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,40,128,1,float16,float16,0,3.7588958740234375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,40,8,128,1,fp8,fp8,0,5.4278911590576175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,40,128,1,float16,fp8,0,3.1923311233520506
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,40,128,1,fp8,fp8,0,3.083344078063965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,1,128,1,float16,float16,0,3.538003158569336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,1,128,1,float16,fp8,0,2.72411994934082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,1,128,1,fp8,fp8,0,2.7256336212158203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,2,128,1,float16,float16,0,3.4746734619140627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,2,128,1,float16,fp8,0,2.964211273193359
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,2,128,1,fp8,fp8,0,2.7232847213745117
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,4,128,1,float16,float16,0,3.416761779785156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,4,128,1,float16,fp8,0,2.8644479751586913
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,4,128,1,fp8,fp8,0,2.7295743942260744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,8,128,1,float16,float16,0,3.4832496643066406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,8,128,1,float16,fp8,0,2.8895055770874025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,40,128,1,float16,float16,0,1.9040607452392577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,40,8,128,1,fp8,fp8,0,2.735686492919922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,40,128,1,float16,fp8,0,1.861012840270996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,40,128,1,fp8,fp8,0,1.5418496131896973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,1,128,1,float16,float16,0,1.6136207580566406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,1,128,1,float16,fp8,0,1.388030433654785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,1,128,1,fp8,fp8,0,1.394155216217041
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,2,128,1,float16,float16,0,1.6364784240722656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,2,128,1,float16,fp8,0,1.3908880233764649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,2,128,1,fp8,fp8,0,1.3858783721923829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,4,128,1,float16,float16,0,1.7089199066162108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,4,128,1,float16,fp8,0,1.3849455833435058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,4,128,1,fp8,fp8,0,1.400324821472168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,8,128,1,float16,fp8,0,1.3879584312438964
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,8,128,1,float16,float16,0,1.6907888412475587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,40,8,128,1,fp8,fp8,0,1.389681625366211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,40,128,1,float16,float16,0,0.9658528327941894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,40,128,1,float16,fp8,0,0.7943088054656983
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,40,128,1,fp8,fp8,0,0.8616111755371094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,1,128,1,float16,fp8,0,0.7168560028076172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,1,128,1,float16,float16,0,0.8566703796386719
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,1,128,1,fp8,fp8,0,0.7174160003662109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,2,128,1,float16,float16,0,0.8348832130432129
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,2,128,1,float16,fp8,0,0.7165567874908447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,2,128,1,fp8,fp8,0,0.7896751880645752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,4,128,1,float16,float16,0,0.8490048408508301
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,4,128,1,float16,fp8,0,0.7269872188568115
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,4,128,1,fp8,fp8,0,0.7175983905792236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,8,128,1,float16,float16,0,0.8972368240356445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,8,128,1,float16,fp8,0,0.763643217086792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,40,8,128,1,fp8,fp8,0,0.7656288146972656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,40,128,1,float16,float16,0,0.514350414276123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,40,128,1,float16,fp8,0,0.4447519779205322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,40,128,1,fp8,fp8,0,0.4486991882324219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,1,128,1,float16,float16,0,0.4464911937713623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,1,128,1,float16,fp8,0,0.38412160873413087
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,1,128,1,fp8,fp8,0,0.40326719284057616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,2,128,1,float16,float16,0,0.44563999176025393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,2,128,1,float16,fp8,0,0.3847935914993286
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,2,128,1,fp8,fp8,0,0.3836848020553589
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,4,128,1,float16,float16,0,0.45804481506347655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,4,128,1,float16,fp8,0,0.3871648073196411
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,4,128,1,fp8,fp8,0,0.3849168062210083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,8,128,1,float16,float16,0,0.4492208003997803
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,8,128,1,float16,fp8,0,0.38616480827331545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,40,8,128,1,fp8,fp8,0,0.3854720115661621
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,40,1,128,1,float16,fp8,0,3.2330928802490235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,40,1,128,1,fp8,fp8,0,3.2656734466552733
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,40,1,128,1,float16,float16,0,3.883393478393555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,40,2,128,1,float16,float16,0,3.9242767333984374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,40,2,128,1,float16,fp8,0,3.2454288482666014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,40,2,128,1,fp8,fp8,0,3.2619297027587892
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,40,4,128,1,float16,fp8,0,3.2953617095947267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,40,4,128,1,float16,float16,0,4.121672058105469
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,40,4,128,1,fp8,fp8,0,3.2358367919921873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,40,8,128,1,float16,float16,0,4.153750228881836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,40,8,128,1,fp8,fp8,0,3.2799633026123045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,40,128,1,float16,float16,0,2.2728496551513673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,40,128,1,float16,fp8,0,2.2542816162109376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,1,128,1,float16,float16,0,1.919715118408203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,40,8,128,1,float16,fp8,0,3.239788818359375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,1,128,1,float16,fp8,0,1.648931121826172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,1,128,1,fp8,fp8,0,1.766321563720703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,2,128,1,float16,float16,0,1.890894317626953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,2,128,1,float16,fp8,0,1.637607955932617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,40,128,1,fp8,fp8,0,1.882271957397461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,2,128,1,fp8,fp8,0,1.635055923461914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,4,128,1,float16,fp8,0,1.6414384841918945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,4,128,1,float16,float16,0,1.9979808807373047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,4,128,1,fp8,fp8,0,1.6365728378295898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,8,128,1,float16,float16,0,1.941320037841797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,40,128,1,float16,float16,0,1.1649616241455079
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,8,128,1,float16,fp8,0,1.64477596282959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,40,8,128,1,fp8,fp8,0,1.6930143356323242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,40,128,1,fp8,fp8,0,0.9558896064758301
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,40,128,1,float16,fp8,0,1.069480037689209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,1,128,1,float16,float16,0,0.9866623878479004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,1,128,1,float16,fp8,0,0.8390895843505859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,1,128,1,fp8,fp8,0,0.8390975952148437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,2,128,1,float16,float16,0,1.0243200302124023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,2,128,1,float16,fp8,0,0.8931504249572754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,4,128,1,float16,float16,0,0.9723183631896972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,4,128,1,float16,fp8,0,0.8793791770935059
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,4,128,1,fp8,fp8,0,0.8615951538085938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,8,128,1,float16,fp8,0,0.8567407608032227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,8,128,1,float16,float16,0,0.994660758972168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,8,128,1,fp8,fp8,0,0.8563599586486816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,40,128,1,fp8,fp8,0,0.49892477989196776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,40,128,1,float16,float16,0,0.6051040172576905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,1,128,1,float16,fp8,0,0.43941440582275393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,1,128,1,float16,float16,0,0.5086544036865235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,1,128,1,fp8,fp8,0,0.4405935764312744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,40,2,128,1,fp8,fp8,0,0.8392512321472168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,2,128,1,float16,float16,0,0.5101791858673096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,2,128,1,float16,fp8,0,0.4409440040588379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,2,128,1,fp8,fp8,0,0.44062080383300783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,4,128,1,float16,float16,0,0.5197855949401855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,4,128,1,float16,fp8,0,0.4440432071685791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,4,128,1,fp8,fp8,0,0.44121761322021485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,8,128,1,float16,float16,0,0.5396128177642823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,8,128,1,float16,fp8,0,0.44208478927612305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,8,128,1,fp8,fp8,0,0.4424448013305664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,40,128,1,float16,float16,0,0.34208800792694094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,40,128,1,float16,fp8,0,0.2726032018661499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,40,128,1,fp8,fp8,0,0.29502239227294924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,1,128,1,float16,float16,0,0.27155361175537107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,1,128,1,float16,fp8,0,0.26214399337768557
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,40,40,128,1,float16,fp8,0,0.5434319972991943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,1,128,1,fp8,fp8,0,0.23992960453033446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,2,128,1,float16,float16,0,0.2787440061569214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,2,128,1,float16,fp8,0,0.23997120857238768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,2,128,1,fp8,fp8,0,0.2404047966003418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,4,128,1,float16,fp8,0,0.24019041061401367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,4,128,1,fp8,fp8,0,0.24094560146331787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,8,128,1,float16,float16,0,0.2767312049865723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,8,128,1,float16,fp8,0,0.24178080558776854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,8,128,1,fp8,fp8,0,0.24191679954528808
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,40,4,128,1,float16,float16,0,0.2792079925537109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,40,1,128,1,float16,fp8,0,3.2007102966308594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,40,1,128,1,float16,float16,0,3.7084304809570314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,40,1,128,1,fp8,fp8,0,3.2027103424072267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,40,2,128,1,float16,fp8,0,3.2018177032470705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,40,2,128,1,fp8,fp8,0,3.282360076904297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,40,4,128,1,float16,fp8,0,3.203644943237305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,40,4,128,1,float16,float16,0,3.863828659057617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,40,4,128,1,fp8,fp8,0,3.2114639282226562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,40,2,128,1,float16,float16,0,3.6051681518554686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,40,128,1,float16,float16,0,2.305232048034668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,40,8,128,1,fp8,fp8,0,3.2094047546386717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,40,8,128,1,float16,fp8,0,3.272417449951172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,40,8,128,1,float16,float16,0,3.7655055999755858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,40,128,1,float16,fp8,0,2.013599967956543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,1,128,1,float16,fp8,0,1.6408559799194335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,1,128,1,float16,float16,0,1.8445039749145509
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,40,128,1,fp8,fp8,0,1.9152799606323243
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,1,128,1,fp8,fp8,0,1.6133295059204102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,2,128,1,float16,float16,0,1.8189023971557616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,2,128,1,float16,fp8,0,1.8616016387939454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,4,128,1,float16,float16,0,1.823899269104004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,4,128,1,float16,fp8,0,1.7392208099365234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,4,128,1,fp8,fp8,0,1.620172882080078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,8,128,1,float16,float16,0,1.9385904312133788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,8,128,1,float16,fp8,0,1.6196640014648438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,8,128,1,fp8,fp8,0,1.6234304428100585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,40,2,128,1,fp8,fp8,0,1.6163135528564454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,40,128,1,float16,fp8,0,0.9761152267456055
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,1,128,1,float16,float16,0,0.9139311790466309
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,40,128,1,float16,float16,0,1.2392399787902832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,40,128,1,fp8,fp8,0,0.9751744270324707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,1,128,1,float16,fp8,0,0.8226160049438477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,1,128,1,fp8,fp8,0,0.8247808456420899
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,2,128,1,float16,fp8,0,0.8234496116638184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,2,128,1,float16,float16,0,0.974550437927246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,2,128,1,fp8,fp8,0,0.8235024452209473
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,4,128,1,float16,fp8,0,0.8249855995178222
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,4,128,1,float16,float16,0,0.9318016052246094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,4,128,1,fp8,fp8,0,0.8605392456054688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,40,128,1,float16,float16,0,0.6066880226135254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,8,128,1,float16,fp8,0,0.8274944305419922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,8,128,1,float16,float16,0,0.9527183532714844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,40,8,128,1,fp8,fp8,0,0.8888416290283203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,40,128,1,float16,fp8,0,0.5080959796905518
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,40,128,1,fp8,fp8,0,0.5073408126831055
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,1,128,1,float16,float16,0,0.47410240173339846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,1,128,1,float16,fp8,0,0.46065759658813477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,1,128,1,fp8,fp8,0,0.42777280807495116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,2,128,1,float16,fp8,0,0.42971038818359375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,2,128,1,fp8,fp8,0,0.42815361022949217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,4,128,1,float16,float16,0,0.4770959854125977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,4,128,1,float16,fp8,0,0.4284832000732422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,4,128,1,fp8,fp8,0,0.42864480018615725
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,8,128,1,float16,fp8,0,0.4296864032745361
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,8,128,1,fp8,fp8,0,0.4295792102813721
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,40,128,1,float16,fp8,0,0.2701535940170288
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,40,128,1,fp8,fp8,0,0.26951839923858645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,1,128,1,float16,float16,0,0.2536112070083618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,1,128,1,float16,fp8,0,0.2295248031616211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,1,128,1,fp8,fp8,0,0.2296799898147583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,2,128,1,float16,float16,0,0.25416638851165774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,2,128,1,float16,fp8,0,0.22965919971466064
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,2,128,1,float16,float16,0,0.48604798316955566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,2,128,1,fp8,fp8,0,0.22950880527496337
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,4,128,1,float16,float16,0,0.2607935905456543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,4,128,1,float16,fp8,0,0.22966558933258058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,4,128,1,fp8,fp8,0,0.2302272081375122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,8,128,1,float16,float16,0,0.2626960039138794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,8,128,1,float16,fp8,0,0.23154881000518798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,40,8,128,1,float16,float16,0,0.49244160652160646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,8,128,1,fp8,fp8,0,0.23112161159515382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,40,128,1,float16,fp8,0,0.1506592035293579
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,40,40,128,1,float16,float16,0,0.32051360607147217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,40,128,1,fp8,fp8,0,0.15152640342712403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,1,128,1,float16,float16,0,0.1398512005805969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,1,128,1,float16,fp8,0,0.12939200401306153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,1,128,1,fp8,fp8,0,0.12942559719085694
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,2,128,1,float16,float16,0,0.13969119787216186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,2,128,1,float16,fp8,0,0.12929760217666625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,2,128,1,fp8,fp8,0,0.1293552041053772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,4,128,1,float16,float16,0,0.1422767996788025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,4,128,1,float16,fp8,0,0.1292736053466797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,4,128,1,fp8,fp8,0,0.12936960458755492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,8,128,1,float16,float16,0,0.14577440023422242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,8,128,1,float16,fp8,0,0.12951200008392333
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,8,128,1,fp8,fp8,0,0.12916159629821777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,40,1,128,1,float16,fp8,0,1.9892927169799806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,40,1,128,1,float16,float16,0,2.152884864807129
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,40,40,128,1,float16,float16,0,0.18006240129470824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,40,1,128,1,fp8,fp8,0,1.988871955871582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,40,2,128,1,float16,float16,0,2.1830896377563476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,40,2,128,1,float16,fp8,0,1.9958511352539063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,40,2,128,1,fp8,fp8,0,1.9904912948608398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,40,4,128,1,float16,fp8,0,1.9971551895141602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,40,4,128,1,float16,float16,0,2.2947263717651367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,40,4,128,1,fp8,fp8,0,1.993934440612793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,40,8,128,1,float16,float16,0,2.2734703063964843
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,40,8,128,1,float16,fp8,0,2.0022207260131837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,40,8,128,1,fp8,fp8,0,2.1017295837402346
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,40,128,1,float16,float16,0,1.4562224388122558
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,40,128,1,fp8,fp8,0,1.235713577270508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,40,128,1,float16,fp8,0,1.3127632141113281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,1,128,1,float16,float16,0,1.0857952117919922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,1,128,1,float16,fp8,0,1.0115391731262207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,1,128,1,fp8,fp8,0,1.009828758239746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,2,128,1,float16,float16,0,1.088803195953369
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,2,128,1,fp8,fp8,0,1.0094032287597656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,4,128,1,float16,fp8,0,1.0164655685424804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,4,128,1,float16,float16,0,1.1336624145507812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,4,128,1,fp8,fp8,0,1.0107808113098145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,8,128,1,float16,fp8,0,1.0950176239013671
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,8,128,1,float16,float16,0,1.1476127624511718
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,8,128,1,fp8,fp8,0,1.0137392044067384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,40,128,1,float16,fp8,0,0.6306879997253418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,40,128,1,float16,float16,0,0.7561615943908692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,40,2,128,1,float16,fp8,0,1.0573023796081542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,1,128,1,float16,float16,0,0.5566207885742187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,40,128,1,fp8,fp8,0,0.631766414642334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,1,128,1,fp8,fp8,0,0.517732810974121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,2,128,1,float16,float16,0,0.5618000030517578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,2,128,1,float16,fp8,0,0.5184879779815674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,2,128,1,fp8,fp8,0,0.5181407928466797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,4,128,1,float16,float16,0,0.5716991901397706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,4,128,1,float16,fp8,0,0.5191808223724366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,4,128,1,fp8,fp8,0,0.5185535907745361
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,8,128,1,float16,fp8,0,0.5206399917602539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,8,128,1,fp8,fp8,0,0.5202144145965576
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,40,128,1,float16,float16,0,0.38737919330596926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,40,128,1,float16,fp8,0,0.3300895929336548
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,40,128,1,fp8,fp8,0,0.3305311918258667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,1,128,1,float16,float16,0,0.2919343948364258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,1,128,1,float16,fp8,0,0.27276480197906494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,1,128,1,fp8,fp8,0,0.27244958877563474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,2,128,1,float16,float16,0,0.29512801170349123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,2,128,1,float16,fp8,0,0.272710394859314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,2,128,1,fp8,fp8,0,0.2725840091705322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,4,128,1,float16,float16,0,0.2978096008300781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,4,128,1,float16,fp8,0,0.27244958877563474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,4,128,1,fp8,fp8,0,0.2726639986038208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,8,128,1,float16,float16,0,0.3098367929458618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,8,128,1,float16,fp8,0,0.2741919994354248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,40,8,128,1,fp8,fp8,0,0.2730288028717041
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,40,128,1,float16,float16,0,0.20924320220947265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,8,128,1,float16,float16,0,0.5925600051879882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,40,128,1,fp8,fp8,0,0.1799232006072998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,1,128,1,float16,float16,0,0.16003680229187012
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,1,128,1,float16,fp8,0,0.14810240268707275
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,1,128,1,fp8,fp8,0,0.14778399467468262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,2,128,1,float16,float16,0,0.16132320165634156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,2,128,1,float16,fp8,0,0.14788320064544677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,2,128,1,fp8,fp8,0,0.14937119483947753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,4,128,1,float16,float16,0,0.1621791958808899
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,4,128,1,float16,fp8,0,0.14947199821472168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,4,128,1,fp8,fp8,0,0.14830080270767212
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,8,128,1,float16,float16,0,0.16816480159759523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,8,128,1,float16,fp8,0,0.1496559977531433
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,40,1,128,1,float16,fp8,0,0.5196447849273682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,8,128,1,fp8,fp8,0,0.14971519708633424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,40,128,1,float16,float16,0,0.11916799545288086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,40,128,1,float16,fp8,0,0.10258079767227173
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,40,128,1,fp8,fp8,0,0.10169919729232788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,1,128,1,float16,float16,0,0.09251360297203064
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,1,128,1,float16,fp8,0,0.08796479701995849
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,1,128,1,fp8,fp8,0,0.08668479919433594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,2,128,1,float16,float16,0,0.09244959950447082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,2,128,1,float16,fp8,0,0.08768640160560608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,2,128,1,fp8,fp8,0,0.08659679889678955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,4,128,1,float16,float16,0,0.09324319958686829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,4,128,1,float16,fp8,0,0.08772320151329041
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,4,128,1,fp8,fp8,0,0.08651679754257202
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,8,128,1,float16,float16,0,0.0947488009929657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,8,128,1,float16,fp8,0,0.08732159733772278
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,40,8,128,1,fp8,fp8,0,0.08658400177955627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,40,40,128,1,float16,fp8,0,0.1786736011505127
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,40,1,128,1,float16,fp8,0,2.0893280029296877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,40,1,128,1,float16,float16,0,2.1676303863525392
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,40,1,128,1,fp8,fp8,0,2.08603515625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,40,2,128,1,float16,float16,0,2.1894927978515626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,40,2,128,1,float16,fp8,0,2.0909744262695313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,40,2,128,1,fp8,fp8,0,2.0895919799804688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,40,4,128,1,float16,float16,0,2.2491632461547852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,40,4,128,1,float16,fp8,0,2.13525447845459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,40,4,128,1,fp8,fp8,0,2.0924848556518554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,40,8,128,1,float16,float16,0,2.3381088256835936
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,40,8,128,1,float16,fp8,0,2.1260095596313477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,40,8,128,1,fp8,fp8,0,2.095793533325195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,40,128,1,float16,fp8,0,1.347276782989502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,40,128,1,float16,float16,0,1.6106992721557618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,40,128,1,fp8,fp8,0,1.3483391761779786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,1,128,1,float16,float16,0,1.0935392379760742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,1,128,1,float16,fp8,0,1.1296064376831054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,1,128,1,fp8,fp8,0,1.0541664123535157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,2,128,1,float16,float16,0,1.0986144065856933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,2,128,1,float16,fp8,0,1.0690799713134767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,2,128,1,fp8,fp8,0,1.054980754852295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,4,128,1,float16,float16,0,1.1339280128479003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,4,128,1,float16,fp8,0,1.05557279586792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,4,128,1,fp8,fp8,0,1.0561408042907714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,8,128,1,float16,float16,0,1.1852848052978515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,8,128,1,float16,fp8,0,1.0588640213012694
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,40,128,1,float16,float16,0,0.7975056171417236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,40,8,128,1,fp8,fp8,0,1.0589424133300782
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,40,128,1,float16,fp8,0,0.7241360187530518
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,40,128,1,fp8,fp8,0,0.6849552154541015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,1,128,1,float16,float16,0,0.569379186630249
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,1,128,1,fp8,fp8,0,0.5388288021087646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,2,128,1,float16,fp8,0,0.5390975952148438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,2,128,1,fp8,fp8,0,0.5420688152313232
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,4,128,1,float16,float16,0,0.5876783847808837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,4,128,1,float16,fp8,0,0.5389984130859375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,4,128,1,fp8,fp8,0,0.5397151947021485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,8,128,1,float16,float16,0,0.6009456157684326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,8,128,1,float16,fp8,0,0.5414415836334229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,8,128,1,fp8,fp8,0,0.5408239841461182
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,40,128,1,float16,float16,0,0.410643196105957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,40,128,1,float16,fp8,0,0.354801607131958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,1,128,1,float16,fp8,0,0.5378352165222168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,40,128,1,fp8,fp8,0,0.35583200454711916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,1,128,1,float16,float16,0,0.29178240299224856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,40,2,128,1,float16,float16,0,0.5606400012969971
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,1,128,1,float16,fp8,0,0.279256010055542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,1,128,1,fp8,fp8,0,0.2798640012741089
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,2,128,1,float16,float16,0,0.29317119121551516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,2,128,1,float16,fp8,0,0.2790976047515869
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,2,128,1,fp8,fp8,0,0.2793359994888306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,4,128,1,float16,float16,0,0.30371201038360596
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,4,128,1,float16,fp8,0,0.2804464101791382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,4,128,1,fp8,fp8,0,0.27965760231018066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,8,128,1,float16,float16,0,0.3133824110031128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,8,128,1,float16,fp8,0,0.2817039966583252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,40,128,1,float16,float16,0,0.21627519130706788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,40,128,1,float16,fp8,0,0.18883039951324462
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,1,128,1,float16,float16,0,0.15760799646377563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,1,128,1,float16,fp8,0,0.1500048041343689
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,1,128,1,fp8,fp8,0,0.15003520250320435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,2,128,1,float16,float16,0,0.15881919860839844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,2,128,1,float16,fp8,0,0.15016800165176392
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,2,128,1,fp8,fp8,0,0.15023679733276368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,4,128,1,float16,float16,0,0.16060639619827272
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,4,128,1,float16,fp8,0,0.1511247992515564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,4,128,1,fp8,fp8,0,0.15018399953842163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,8,128,1,float16,float16,0,0.16819679737091064
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,8,128,1,float16,fp8,0,0.15048480033874512
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,8,128,1,fp8,fp8,0,0.15148639678955078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,40,128,1,float16,float16,0,0.12096480131149293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,40,128,1,float16,fp8,0,0.10517280101776123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,40,128,1,fp8,fp8,0,0.10521279573440552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,1,128,1,float16,float16,0,0.0886687994003296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,1,128,1,float16,fp8,0,0.08467040061950684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,1,128,1,fp8,fp8,0,0.08412160277366638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,2,128,1,float16,float16,0,0.088919997215271
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,2,128,1,float16,fp8,0,0.08446879982948304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,2,128,1,fp8,fp8,0,0.0846127986907959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,4,128,1,float16,float16,0,0.09034559726715088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,4,128,1,float16,fp8,0,0.08480160236358643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,4,128,1,fp8,fp8,0,0.08489599823951721
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,8,128,1,float16,float16,0,0.09444800019264221
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,8,128,1,float16,fp8,0,0.08452159762382508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,40,8,128,1,fp8,fp8,0,0.08553599715232849
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,40,128,1,float16,float16,0,0.0707423985004425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,40,128,1,float16,fp8,0,0.061084800958633424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,40,128,1,fp8,fp8,0,0.061105602979660036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,1,128,1,float16,float16,0,0.05560799837112427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,40,8,128,1,fp8,fp8,0,0.28127360343933105
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,1,128,1,fp8,fp8,0,0.05252479910850525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,2,128,1,float16,float16,0,0.055615997314453124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,2,128,1,float16,fp8,0,0.05276319980621338
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,40,40,128,1,fp8,fp8,0,0.18923840522766114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,2,128,1,fp8,fp8,0,0.05183839797973633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,4,128,1,float16,float16,0,0.0557856023311615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,4,128,1,float16,fp8,0,0.052934402227401735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,4,128,1,fp8,fp8,0,0.05270079970359802
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,8,128,1,float16,float16,0,0.057524800300598145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,8,128,1,float16,fp8,0,0.05343520045280457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,8,128,1,fp8,fp8,0,0.05296159982681274
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,40,1,128,1,float16,float16,0,1.3798239707946778
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,40,1,128,1,float16,fp8,0,1.3608880043029785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,40,1,128,1,fp8,fp8,0,1.3580896377563476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,40,2,128,1,float16,float16,0,1.417795181274414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,40,2,128,1,float16,fp8,0,1.3614656448364257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,40,1,128,1,float16,fp8,0,0.052180802822113036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,40,2,128,1,fp8,fp8,0,1.3584959983825684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,40,4,128,1,float16,float16,0,1.4296735763549804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,40,4,128,1,fp8,fp8,0,1.3605104446411134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,40,8,128,1,float16,float16,0,1.5008111953735352
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,40,8,128,1,float16,fp8,0,1.3646320343017577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,40,8,128,1,fp8,fp8,0,1.3642255783081054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,40,128,1,float16,float16,0,1.0330944061279297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,40,128,1,float16,fp8,0,0.9065024375915527
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,40,4,128,1,float16,fp8,0,1.3627872467041016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,1,128,1,float16,float16,0,0.7011983871459961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,40,128,1,fp8,fp8,0,0.9074992179870606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,1,128,1,fp8,fp8,0,0.6885647773742676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,2,128,1,float16,float16,0,0.7074351787567139
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,2,128,1,float16,fp8,0,0.6888000011444092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,2,128,1,fp8,fp8,0,0.6888271808624268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,4,128,1,float16,float16,0,0.7247935771942139
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,4,128,1,float16,fp8,0,0.6899295806884765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,4,128,1,fp8,fp8,0,0.6896687984466553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,8,128,1,float16,float16,0,0.7604703903198242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,8,128,1,fp8,fp8,0,0.6917647838592529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,1,128,1,float16,fp8,0,0.6892047882080078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,40,128,1,float16,fp8,0,0.46379680633544923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,40,128,1,fp8,fp8,0,0.4647952079772949
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,1,128,1,float16,float16,0,0.3622704029083252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,1,128,1,float16,fp8,0,0.3545007944107056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,1,128,1,fp8,fp8,0,0.35430240631103516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,2,128,1,float16,float16,0,0.3675519943237305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,2,128,1,float16,fp8,0,0.3542864084243774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,2,128,1,fp8,fp8,0,0.35495359897613527
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,4,128,1,float16,float16,0,0.3740976095199585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,4,128,1,float16,fp8,0,0.35506880283355713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,40,8,128,1,float16,fp8,0,0.6927487850189209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,4,128,1,fp8,fp8,0,0.3546367883682251
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,8,128,1,float16,float16,0,0.39567201137542723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,8,128,1,float16,fp8,0,0.3557456016540527
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,8,128,1,fp8,fp8,0,0.35642080307006835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,40,40,128,1,float16,float16,0,0.5262095928192139
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,40,128,1,float16,float16,0,0.2767024040222168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,40,128,1,float16,fp8,0,0.24263520240783693
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,40,128,1,fp8,fp8,0,0.24231839179992676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,1,128,1,float16,float16,0,0.19113279581069947
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,1,128,1,fp8,fp8,0,0.18646559715270997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,2,128,1,float16,float16,0,0.19247039556503295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,2,128,1,float16,fp8,0,0.18632479906082153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,2,128,1,fp8,fp8,0,0.1862511992454529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,4,128,1,float16,float16,0,0.19685120582580568
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,4,128,1,float16,fp8,0,0.18661760091781615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,4,128,1,fp8,fp8,0,0.18660320043563844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,8,128,1,float16,float16,0,0.20645759105682374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,8,128,1,float16,fp8,0,0.18712639808654785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,8,128,1,fp8,fp8,0,0.18678079843521117
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,40,128,1,float16,fp8,0,0.13129440546035767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,40,128,1,fp8,fp8,0,0.13156640529632568
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,1,128,1,float16,float16,0,0.10663679838180543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,1,128,1,float16,fp8,0,0.1010543942451477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,1,128,1,fp8,fp8,0,0.10090080499649048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,2,128,1,float16,float16,0,0.10676480531692505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,2,128,1,float16,fp8,0,0.10121279954910278
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,2,128,1,fp8,fp8,0,0.10167200565338134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,4,128,1,float16,float16,0,0.10863679647445679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,4,128,1,float16,fp8,0,0.10195519924163818
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,4,128,1,fp8,fp8,0,0.10166079998016357
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,8,128,1,float16,float16,0,0.11307519674301147
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,8,128,1,float16,fp8,0,0.10277279615402221
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,8,128,1,fp8,fp8,0,0.10289599895477294
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,40,128,1,float16,float16,0,0.08416640162467956
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,40,128,1,float16,fp8,0,0.0736735999584198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,40,128,1,fp8,fp8,0,0.07380160093307495
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,1,128,1,float16,float16,0,0.06181120276451111
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,1,128,1,float16,fp8,0,0.059575998783111574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,1,128,1,fp8,fp8,0,0.05939840078353882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,2,128,1,float16,float16,0,0.061699199676513675
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,2,128,1,float16,fp8,0,0.05900319814682007
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,2,128,1,fp8,fp8,0,0.05964159965515137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,4,128,1,float16,float16,0,0.06197919845581055
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,4,128,1,float16,fp8,0,0.059595197439193726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,4,128,1,fp8,fp8,0,0.0595088005065918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,40,1,128,1,float16,fp8,0,0.1870255947113037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,8,128,1,float16,float16,0,0.06397119760513306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,8,128,1,float16,fp8,0,0.059540802240371705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,40,8,128,1,fp8,fp8,0,0.059678399562835695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,40,128,1,float16,float16,0,0.04825280010700226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,40,128,1,fp8,fp8,0,0.045747199654579164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,1,128,1,float16,float16,0,0.041223999857902524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,1,128,1,float16,fp8,0,0.03935199975967407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,1,128,1,fp8,fp8,0,0.03915840089321136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,2,128,1,float16,float16,0,0.04170880019664765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,2,128,1,float16,fp8,0,0.03916159868240356
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,2,128,1,fp8,fp8,0,0.039422398805618285
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,4,128,1,float16,float16,0,0.04133279919624329
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,40,40,128,1,float16,float16,0,0.1477504014968872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,4,128,1,fp8,fp8,0,0.039345601201057435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,8,128,1,float16,float16,0,0.04314239919185638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,8,128,1,float16,fp8,0,0.039155200123786926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,8,128,1,fp8,fp8,0,0.03943839967250824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,40,1,128,1,float16,float16,0,1.5128767967224122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,40,128,1,float16,fp8,0,0.04571360051631927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,40,4,128,1,float16,fp8,0,0.03936960101127625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,40,1,128,1,fp8,fp8,0,1.5271023750305175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,40,2,128,1,float16,float16,0,1.5230480194091798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,40,2,128,1,float16,fp8,0,1.5229056358337403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,40,2,128,1,fp8,fp8,0,1.521072006225586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,40,1,128,1,float16,fp8,0,1.521463966369629
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,40,4,128,1,float16,float16,0,1.570967960357666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,40,4,128,1,float16,fp8,0,1.5246272087097168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,40,4,128,1,fp8,fp8,0,1.5220656394958496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,40,8,128,1,float16,float16,0,1.6680816650390624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,40,8,128,1,float16,fp8,0,1.5300543785095215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,40,8,128,1,fp8,fp8,0,1.5274703979492188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,40,128,1,float16,float16,0,1.194865608215332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,40,128,1,float16,fp8,0,1.0569007873535157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,40,128,1,fp8,fp8,0,1.0535375595092773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,1,128,1,float16,float16,0,0.7683519840240478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,1,128,1,float16,fp8,0,0.770417594909668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,1,128,1,fp8,fp8,0,0.7694911956787109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,2,128,1,float16,float16,0,0.7719488143920898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,2,128,1,float16,fp8,0,0.7703360080718994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,2,128,1,fp8,fp8,0,0.7686304092407227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,4,128,1,float16,float16,0,0.7934432029724121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,4,128,1,float16,fp8,0,0.7705808162689209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,4,128,1,fp8,fp8,0,0.7717360019683838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,8,128,1,float16,float16,0,0.8419119834899902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,8,128,1,float16,fp8,0,0.7726096153259278
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,40,128,1,float16,float16,0,0.6033279895782471
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,40,128,1,float16,fp8,0,0.5359744071960449
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,40,128,1,fp8,fp8,0,0.5353519916534424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,1,128,1,float16,float16,0,0.3949343919754028
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,1,128,1,float16,fp8,0,0.39235360622406007
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,1,128,1,fp8,fp8,0,0.3928639888763428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,2,128,1,float16,float16,0,0.39718239307403563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,2,128,1,float16,fp8,0,0.39221758842468263
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,2,128,1,fp8,fp8,0,0.39279038906097413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,4,128,1,float16,float16,0,0.40849919319152833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,4,128,1,float16,fp8,0,0.39344000816345215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,4,128,1,fp8,fp8,0,0.3936784029006958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,8,128,1,float16,float16,0,0.4325232028961182
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,8,128,1,float16,fp8,0,0.3947871923446655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,40,8,128,1,fp8,fp8,0,0.3951359987258911
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,40,128,1,float16,float16,0,0.31085920333862305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,40,128,1,float16,fp8,0,0.2766736030578613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,40,128,1,fp8,fp8,0,0.2765712022781372
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,1,128,1,float16,float16,0,0.20723040103912355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,1,128,1,float16,fp8,0,0.20435678958892822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,1,128,1,fp8,fp8,0,0.2042367935180664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,40,8,128,1,fp8,fp8,0,0.7734399795532226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,2,128,1,float16,fp8,0,0.20401279926300048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,2,128,1,fp8,fp8,0,0.204201602935791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,4,128,1,float16,float16,0,0.21304640769958497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,4,128,1,float16,fp8,0,0.20496160984039308
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,4,128,1,fp8,fp8,0,0.20536320209503173
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,8,128,1,float16,float16,0,0.2239840030670166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,8,128,1,float16,fp8,0,0.2054527997970581
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,8,128,1,fp8,fp8,0,0.2064176082611084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,40,128,1,float16,float16,0,0.1639840006828308
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,40,128,1,float16,fp8,0,0.14651039838790894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,40,128,1,fp8,fp8,0,0.14758880138397218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,1,128,1,float16,float16,0,0.11227519512176513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,1,128,1,float16,fp8,0,0.1098479986190796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,1,128,1,fp8,fp8,0,0.10999519824981689
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,2,128,1,float16,float16,0,0.11261440515518188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,2,128,1,float16,fp8,0,0.10967520475387574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,2,128,1,fp8,fp8,0,0.11028640270233155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,4,128,1,float16,float16,0,0.11518559455871583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,4,128,1,float16,fp8,0,0.1099392056465149
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,4,128,1,fp8,fp8,0,0.11054879426956177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,8,128,1,float16,float16,0,0.12138880491256714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,8,128,1,float16,fp8,0,0.11064800024032592
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,40,8,128,1,fp8,fp8,0,0.11088160276412964
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,40,128,1,float16,float16,0,0.09242879748344421
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,40,128,1,float16,fp8,0,0.08120800256729126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,1,128,1,float16,float16,0,0.06380320191383362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,1,128,1,float16,fp8,0,0.06176959872245789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,1,128,1,fp8,fp8,0,0.06201279759407043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,2,128,1,float16,float16,0,0.06353440284729003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,2,128,1,float16,fp8,0,0.06179839968681335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,2,128,1,fp8,fp8,0,0.061673599481582644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,4,128,1,float16,float16,0,0.06520479917526245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,4,128,1,float16,fp8,0,0.06160640120506287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,4,128,1,fp8,fp8,0,0.06159359812736511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,8,128,1,float16,float16,0,0.06845759749412536
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,8,128,1,float16,fp8,0,0.06194400191307068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,8,128,1,fp8,fp8,0,0.06223840117454529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,40,128,1,float16,fp8,0,0.04727199971675873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,40,2,128,1,float16,float16,0,0.20764639377593994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,40,128,1,fp8,fp8,0,0.04726240038871765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,1,128,1,float16,float16,0,0.04091840088367462
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,1,128,1,float16,fp8,0,0.03859519958496094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,1,128,1,fp8,fp8,0,0.039094400405883786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,2,128,1,float16,float16,0,0.03948639929294586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,2,128,1,float16,fp8,0,0.039084801077842714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,2,128,1,fp8,fp8,0,0.0382095992565155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,4,128,1,float16,float16,0,0.0410975992679596
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,4,128,1,float16,fp8,0,0.03832319974899292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,4,128,1,fp8,fp8,0,0.03901920020580292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,8,128,1,float16,float16,0,0.04122560024261475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,8,128,1,float16,fp8,0,0.03907679915428162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,8,128,1,fp8,fp8,0,0.0389488011598587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,40,128,1,float16,float16,0,0.032969599962234496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,40,128,1,float16,fp8,0,0.03295679986476898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,40,128,1,fp8,fp8,0,0.032979199290275575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,1,128,1,float16,float16,0,0.030185601115226744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,1,128,1,fp8,fp8,0,0.028804799914360045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,40,40,128,1,fp8,fp8,0,0.08174239993095397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,2,128,1,float16,float16,0,0.029275199770927428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,2,128,1,float16,fp8,0,0.028815999627113342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,2,128,1,fp8,fp8,0,0.028803199529647827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,4,128,1,float16,float16,0,0.030868801474571227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,4,128,1,float16,fp8,0,0.02898080050945282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,4,128,1,fp8,fp8,0,0.028798401355743408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,40,40,128,1,float16,float16,0,0.05336959958076477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,8,128,1,float16,float16,0,0.030961599946022034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,8,128,1,float16,fp8,0,0.028951999545097352
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,8,128,1,fp8,fp8,0,0.028832000494003297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,40,1,128,1,float16,float16,0,1.1963472366333008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,40,1,128,1,float16,fp8,0,0.02886880040168762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,40,1,128,1,fp8,fp8,0,1.2322784423828126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,40,2,128,1,float16,float16,0,1.2033087730407714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,40,2,128,1,float16,fp8,0,1.2307680130004883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,40,2,128,1,fp8,fp8,0,1.231503963470459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,40,4,128,1,float16,float16,0,1.2421584129333496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,40,1,128,1,float16,fp8,0,1.2307711601257325
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,40,4,128,1,float16,fp8,0,1.2314144134521485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,40,4,128,1,fp8,fp8,0,1.2321855545043945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,40,8,128,1,float16,float16,0,1.3232912063598632
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,40,128,1,float16,float16,0,0.9952927589416504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,40,8,128,1,fp8,fp8,0,1.2331151962280273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,40,128,1,float16,fp8,0,0.8968655586242675
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,1,128,1,float16,float16,0,0.6067279815673828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,40,128,1,fp8,fp8,0,0.8980143547058106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,1,128,1,float16,fp8,0,0.6225152015686035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,1,128,1,fp8,fp8,0,0.6230512142181397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,2,128,1,float16,float16,0,0.6102543830871582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,2,128,1,float16,fp8,0,0.6220160007476807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,2,128,1,fp8,fp8,0,0.6230336189270019
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,4,128,1,float16,float16,0,0.6314591884613037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,4,128,1,float16,fp8,0,0.6218575954437255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,4,128,1,fp8,fp8,0,0.6227983951568603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,40,8,128,1,float16,fp8,0,1.2313407897949218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,8,128,1,float16,float16,0,0.6707568168640137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,8,128,1,float16,fp8,0,0.6230832099914551
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,40,128,1,float16,float16,0,0.5053247928619384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,40,8,128,1,fp8,fp8,0,0.6236032009124756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,40,128,1,float16,fp8,0,0.45751199722290037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,1,128,1,float16,float16,0,0.3114367961883545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,1,128,1,float16,fp8,0,0.31807680130004884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,1,128,1,fp8,fp8,0,0.3195215940475464
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,2,128,1,float16,float16,0,0.3121392011642456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,2,128,1,float16,fp8,0,0.31790239810943605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,2,128,1,fp8,fp8,0,0.31885600090026855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,4,128,1,float16,float16,0,0.3220655918121338
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,4,128,1,float16,fp8,0,0.3186079978942871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,4,128,1,fp8,fp8,0,0.31966400146484375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,8,128,1,float16,float16,0,0.34227681159973145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,8,128,1,fp8,fp8,0,0.3195375919342041
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,40,128,1,float16,float16,0,0.26094560623168944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,40,128,1,float16,fp8,0,0.23581280708312988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,40,128,1,fp8,fp8,0,0.23552958965301513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,1,128,1,float16,fp8,0,0.16613119840621948
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,1,128,1,fp8,fp8,0,0.16604000329971313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,2,128,1,float16,float16,0,0.1639664053916931
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,2,128,1,float16,fp8,0,0.16568959951400758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,2,128,1,fp8,fp8,0,0.16592639684677124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,40,128,1,fp8,fp8,0,0.4552720069885254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,4,128,1,float16,float16,0,0.16910560131073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,4,128,1,float16,fp8,0,0.16618239879608154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,4,128,1,fp8,fp8,0,0.1662384033203125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,8,128,1,float16,float16,0,0.17849600315093994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,8,128,1,float16,fp8,0,0.16609439849853516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,8,128,1,fp8,fp8,0,0.16689759492874146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,40,128,1,float16,float16,0,0.1383247971534729
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,40,128,1,float16,fp8,0,0.12519359588623047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,40,128,1,fp8,fp8,0,0.12523839473724366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,40,8,128,1,float16,fp8,0,0.3194159984588623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,1,128,1,float16,float16,0,0.08964160084724426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,1,128,1,float16,fp8,0,0.09018880128860474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,1,128,1,fp8,fp8,0,0.09008319973945618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,2,128,1,float16,float16,0,0.09022719860076904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,2,128,1,float16,fp8,0,0.09021440148353577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,2,128,1,fp8,fp8,0,0.08970720171928406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,4,128,1,float16,float16,0,0.0921280026435852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,4,128,1,float16,fp8,0,0.09048799872398376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,4,128,1,fp8,fp8,0,0.09017919898033142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,8,128,1,float16,float16,0,0.09780799746513366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,8,128,1,float16,fp8,0,0.09028639793395996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,40,1,128,1,float16,float16,0,0.164027202129364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,40,128,1,float16,float16,0,0.07633919715881347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,40,128,1,float16,fp8,0,0.06968799829483033
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,40,128,1,fp8,fp8,0,0.06990399956703186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,1,128,1,float16,float16,0,0.050486397743225095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,1,128,1,float16,fp8,0,0.050830399990081786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,1,128,1,fp8,fp8,0,0.05104159712791443
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,2,128,1,float16,float16,0,0.051236802339553834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,2,128,1,float16,fp8,0,0.050435197353363034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,2,128,1,fp8,fp8,0,0.05104320049285889
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,4,128,1,float16,float16,0,0.05244320034980774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,4,128,1,float16,fp8,0,0.050449597835540774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,8,128,1,float16,float16,0,0.05580639839172363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,8,128,1,float16,fp8,0,0.05135520100593567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,8,128,1,fp8,fp8,0,0.05125439763069153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,40,128,1,float16,float16,0,0.04531840085983276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,40,128,1,float16,fp8,0,0.041075199842453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,40,128,1,fp8,fp8,0,0.04112800061702728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,40,8,128,1,fp8,fp8,0,0.09024159908294678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,1,128,1,float16,fp8,0,0.03287039995193482
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,1,128,1,fp8,fp8,0,0.03254559934139252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,2,128,1,float16,float16,0,0.03295679986476898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,40,4,128,1,fp8,fp8,0,0.05104159712791443
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,2,128,1,float16,fp8,0,0.03292160034179688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,2,128,1,fp8,fp8,0,0.03283360004425049
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,4,128,1,float16,float16,0,0.03291999995708465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,4,128,1,float16,fp8,0,0.03295679986476898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,4,128,1,fp8,fp8,0,0.032304000854492185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,1,128,1,float16,float16,0,0.03293119966983795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,8,128,1,float16,fp8,0,0.031892800331115724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,40,128,1,float16,float16,0,0.02848159968852997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,40,128,1,float16,fp8,0,0.02885119915008545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,40,128,1,fp8,fp8,0,0.02885119915008545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,1,128,1,float16,float16,0,0.02473759949207306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,1,128,1,float16,fp8,0,0.0247856006026268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,1,128,1,fp8,fp8,0,0.024753600358963013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,2,128,1,float16,float16,0,0.02476480007171631
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,2,128,1,float16,fp8,0,0.02475679963827133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,2,128,1,fp8,fp8,0,0.024769599735736846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,4,128,1,float16,float16,0,0.024897600710391998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,4,128,1,float16,fp8,0,0.023771199584007262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,4,128,1,fp8,fp8,0,0.02404800057411194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,8,128,1,float16,float16,0,0.024774399399757386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,8,128,1,float16,fp8,0,0.02406720072031021
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,40,8,128,1,fp8,fp8,0,0.02458080053329468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,40,128,1,float16,float16,0,0.018632000684738158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,40,128,1,float16,fp8,0,0.018716800212860107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,40,128,1,fp8,fp8,0,0.018622399866580965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,1,128,1,float16,float16,0,0.017608000338077544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,1,128,1,float16,fp8,0,0.016568000614643096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,1,128,1,fp8,fp8,0,0.01664000004529953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,2,128,1,float16,float16,0,0.017473599314689635
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,2,128,1,float16,fp8,0,0.01658560037612915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,2,128,1,fp8,fp8,0,0.01656319946050644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,8,128,1,float16,float16,0,0.035078400373458864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,4,128,1,float16,fp8,0,0.016577599942684172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,4,128,1,fp8,fp8,0,0.016655999422073364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,8,128,1,float16,fp8,0,0.016707199811935424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,8,128,1,fp8,fp8,0,0.016672000288963318
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,40,1,128,1,float16,float16,0,0.5147168159484863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,40,1,128,1,float16,fp8,0,0.5386879920959473
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,40,1,128,1,fp8,fp8,0,0.5388879776000977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,40,2,128,1,float16,float16,0,0.5174992084503174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,4,128,1,float16,float16,0,0.018748800456523895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,40,8,128,1,float16,float16,0,0.01801919937133789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,40,2,128,1,float16,fp8,0,0.5387968063354492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,40,2,128,1,fp8,fp8,0,0.5372943878173828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,40,4,128,1,float16,float16,0,0.5347296237945557
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,40,4,128,1,float16,fp8,0,0.5381968021392822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,40,4,128,1,fp8,fp8,0,0.537278413772583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,40,8,128,1,float16,float16,0,0.5732672214508057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,40,8,128,1,float16,fp8,0,0.5383423805236817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,40,128,1,float16,float16,0,0.446457576751709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,40,128,1,float16,fp8,0,0.4082047939300537
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,40,8,128,1,fp8,fp8,0,0.5373072147369384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,1,128,1,float16,float16,0,0.2641855955123901
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,40,128,1,fp8,fp8,0,0.40752158164978025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,1,128,1,float16,fp8,0,0.27469120025634763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,1,128,1,fp8,fp8,0,0.27476160526275634
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,2,128,1,float16,float16,0,0.26466879844665525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,2,128,1,float16,fp8,0,0.2744112014770508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,2,128,1,fp8,fp8,0,0.2746527910232544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,4,128,1,float16,float16,0,0.27369279861450196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,4,128,1,float16,fp8,0,0.2738591909408569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,40,8,128,1,fp8,fp8,0,0.03299359977245331
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,4,128,1,fp8,fp8,0,0.27444159984588623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,8,128,1,float16,float16,0,0.2924367904663086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,8,128,1,float16,fp8,0,0.27476160526275634
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,40,8,128,1,fp8,fp8,0,0.2747632026672363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,40,128,1,float16,fp8,0,0.20902080535888673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,40,128,1,fp8,fp8,0,0.20918240547180175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,1,128,1,float16,float16,0,0.13752959966659545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,1,128,1,float16,fp8,0,0.1415824055671692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,2,128,1,float16,float16,0,0.1374735951423645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,1,128,1,fp8,fp8,0,0.14183679819107056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,2,128,1,float16,fp8,0,0.1418463945388794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,2,128,1,fp8,fp8,0,0.1415824055671692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,4,128,1,float16,float16,0,0.142086398601532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,4,128,1,float16,fp8,0,0.14164479970932006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,4,128,1,fp8,fp8,0,0.14158400297164916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,8,128,1,float16,float16,0,0.1518847942352295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,8,128,1,fp8,fp8,0,0.14164960384368896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,40,128,1,float16,float16,0,0.12332639694213868
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,40,128,1,float16,fp8,0,0.1125216007232666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,40,128,1,fp8,fp8,0,0.1128991961479187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,1,128,1,float16,float16,0,0.07731840014457703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,1,128,1,float16,fp8,0,0.078056001663208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,1,128,1,fp8,fp8,0,0.07792320251464843
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,2,128,1,float16,float16,0,0.0781328022480011
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,2,128,1,float16,fp8,0,0.07807520031929016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,2,128,1,fp8,fp8,0,0.07840160131454468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,4,128,1,float16,fp8,0,0.07808480262756348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,4,128,1,fp8,fp8,0,0.07817280292510986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,8,128,1,float16,float16,0,0.08423839807510376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,8,128,1,float16,fp8,0,0.07840800285339355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,8,128,1,fp8,fp8,0,0.07816159725189209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,40,128,1,float16,float16,0,0.0704479992389679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,40,128,1,float16,float16,0,0.2292112112045288
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,40,128,1,float16,fp8,0,0.06424480080604553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,40,128,1,fp8,fp8,0,0.06418079733848572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,1,128,1,float16,float16,0,0.04534879922866821
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,1,128,1,float16,fp8,0,0.04531840085983276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,1,128,1,fp8,fp8,0,0.045278400182724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,2,128,1,float16,float16,0,0.04532960057258606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,2,128,1,float16,fp8,0,0.045296001434326175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,2,128,1,fp8,fp8,0,0.04530879855155945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,4,128,1,float16,float16,0,0.04713599979877472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,4,128,1,float16,fp8,0,0.045372799038887024
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,4,128,1,fp8,fp8,0,0.04547359943389893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,8,128,1,float16,float16,0,0.049718400835990904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,8,128,1,float16,fp8,0,0.04538240134716034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,40,4,128,1,float16,float16,0,0.08001919984817504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,40,8,128,1,fp8,fp8,0,0.04583519995212555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,40,128,1,float16,float16,0,0.042331200838088986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,40,128,1,fp8,fp8,0,0.03715519905090332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,1,128,1,float16,float16,0,0.030177599191665648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,1,128,1,float16,fp8,0,0.028859201073646545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,1,128,1,fp8,fp8,0,0.028841599822044373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,2,128,1,float16,float16,0,0.029291200637817382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,2,128,1,float16,fp8,0,0.02900800108909607
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,2,128,1,fp8,fp8,0,0.02903839945793152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,4,128,1,float16,float16,0,0.03092319965362549
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,4,128,1,float16,fp8,0,0.028886398673057555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,4,128,1,fp8,fp8,0,0.029148799180984498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,8,128,1,float16,float16,0,0.03097119927406311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,8,128,1,float16,fp8,0,0.028865599632263185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,8,128,1,fp8,fp8,0,0.028944000601768494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,40,128,1,float16,float16,0,0.024835200607776643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,40,128,1,float16,fp8,0,0.024852800369262695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,40,8,128,1,float16,fp8,0,0.14223999977111818
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,40,128,1,fp8,fp8,0,0.02479040026664734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,1,128,1,float16,float16,0,0.022720000147819518
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,1,128,1,float16,fp8,0,0.02073120027780533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,1,128,1,fp8,fp8,0,0.020747199654579163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,2,128,1,float16,float16,0,0.02271520048379898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,2,128,1,float16,fp8,0,0.02090719938278198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,2,128,1,fp8,fp8,0,0.020745599269866945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,4,128,1,float16,float16,0,0.022724799811840057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,4,128,1,float16,fp8,0,0.020644800364971162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,4,128,1,fp8,fp8,0,0.021347199380397797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,8,128,1,float16,float16,0,0.02277279943227768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,8,128,1,float16,fp8,0,0.021988800168037413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,40,8,128,1,fp8,fp8,0,0.020791999995708466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,40,128,1,float16,float16,0,0.0165583997964859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,40,128,1,float16,fp8,0,0.016577599942684172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,40,128,1,fp8,fp8,0,0.016551999747753142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,1,128,1,float16,float16,0,0.014603200554847717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,1,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,1,128,1,fp8,fp8,0,0.01459999978542328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,40,40,128,1,float16,fp8,0,0.0377375990152359
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,2,128,1,float16,fp8,0,0.01451839953660965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,2,128,1,fp8,fp8,0,0.014555199444293976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,4,128,1,float16,float16,0,0.01478559970855713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,4,128,1,float16,fp8,0,0.014564800262451171
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,8,128,1,float16,float16,0,0.016495999693870545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,8,128,1,float16,fp8,0,0.014800000190734863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,8,128,1,fp8,fp8,0,0.014593599736690522
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,40,128,1,float16,float16,0,0.01655679941177368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,40,128,1,float16,fp8,0,0.016468800604343414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,40,128,1,fp8,fp8,0,0.014899200201034546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,1,128,1,float16,float16,0,0.0147024005651474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,1,128,1,float16,fp8,0,0.01446239948272705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,1,128,1,fp8,fp8,0,0.01462240070104599
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,2,128,1,float16,float16,0,0.014524799585342408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,2,128,1,float16,fp8,0,0.014523200690746307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,2,128,1,fp8,fp8,0,0.014451199769973755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,4,128,1,float16,float16,0,0.014771200716495514
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,4,128,1,float16,fp8,0,0.015195199847221374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,4,128,1,fp8,fp8,0,0.014480000734329224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,8,128,1,float16,float16,0,0.01462559998035431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,8,128,1,float16,fp8,0,0.014564800262451171
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,40,8,128,1,fp8,fp8,0,0.014670400321483612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,40,1,128,1,float16,float16,0,0.30749120712280276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,40,1,128,1,float16,fp8,0,0.32164480686187746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,40,1,128,1,fp8,fp8,0,0.32096960544586184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,40,2,128,1,float16,float16,0,0.3091327905654907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,40,2,128,1,float16,fp8,0,0.32020959854125974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,40,2,128,1,fp8,fp8,0,0.32147998809814454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,40,4,128,1,float16,float16,0,0.3165503978729248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,2,128,1,float16,float16,0,0.014504000544548035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,40,4,128,1,float16,fp8,0,0.3208719968795776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,40,4,128,1,fp8,fp8,0,0.014547200500965118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,40,4,128,1,fp8,fp8,0,0.3198415994644165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,40,8,128,1,float16,float16,0,0.3341088056564331
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,40,128,1,float16,float16,0,0.2476047992706299
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,40,128,1,float16,fp8,0,0.23149280548095702
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,1,128,1,float16,float16,0,0.15996320247650148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,40,128,1,fp8,fp8,0,0.23164479732513427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,1,128,1,float16,fp8,0,0.16596319675445556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,1,128,1,fp8,fp8,0,0.16490720510482787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,2,128,1,float16,float16,0,0.15997120141983032
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,2,128,1,float16,fp8,0,0.16518080234527588
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,2,128,1,fp8,fp8,0,0.16592639684677124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,4,128,1,float16,float16,0,0.1640768051147461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,4,128,1,float16,fp8,0,0.1653280019760132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,8,128,1,float16,float16,0,0.17241439819335938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,8,128,1,float16,fp8,0,0.16556160449981688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,8,128,1,fp8,fp8,0,0.16487360000610352
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,40,8,128,1,float16,fp8,0,0.32113120555877683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,40,8,128,1,fp8,fp8,0,0.3197648048400879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,40,128,1,float16,fp8,0,0.12111519575119019
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,40,128,1,fp8,fp8,0,0.12097920179367065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,1,128,1,float16,float16,0,0.08432480096817016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,1,128,1,float16,fp8,0,0.086353600025177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,1,128,1,fp8,fp8,0,0.08644000291824341
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,2,128,1,float16,float16,0,0.08500800132751465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,2,128,1,float16,fp8,0,0.08728320002555848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,2,128,1,fp8,fp8,0,0.08660159707069397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,4,128,1,float16,float16,0,0.08802559971809387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,4,128,1,float16,fp8,0,0.0865887999534607
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,4,128,1,fp8,fp8,0,0.08672639727592468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,8,128,1,float16,float16,0,0.0921567976474762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,8,128,1,float16,fp8,0,0.08694400191307068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,8,128,1,fp8,fp8,0,0.08651360273361205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,40,128,1,float16,float16,0,0.07024319767951966
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,40,128,1,float16,fp8,0,0.06573759913444518
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,40,128,1,fp8,fp8,0,0.06600319743156433
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,40,40,128,1,float16,float16,0,0.12859359979629517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,1,128,1,float16,float16,0,0.04731360077857971
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,1,128,1,float16,fp8,0,0.04779840111732483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,1,128,1,fp8,fp8,0,0.04746719896793365
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,2,128,1,float16,float16,0,0.0474016010761261
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,2,128,1,float16,fp8,0,0.047809600830078125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,2,128,1,fp8,fp8,0,0.04765920042991638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,4,128,1,float16,float16,0,0.049326398968696596
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,4,128,1,float16,fp8,0,0.04890719950199127
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,8,128,1,float16,float16,0,0.05174559950828552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,8,128,1,float16,fp8,0,0.04921599924564361
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,8,128,1,fp8,fp8,0,0.049377599358558656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,40,4,128,1,fp8,fp8,0,0.1648368000984192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,40,128,1,float16,float16,0,0.04145599901676178
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,40,128,1,float16,fp8,0,0.03715839982032776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,1,128,1,float16,fp8,0,0.029019200801849367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,1,128,1,float16,float16,0,0.029025599360466003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,1,128,1,fp8,fp8,0,0.029116800427436827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,2,128,1,float16,float16,0,0.029032000899314882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,2,128,1,float16,fp8,0,0.029039999842643736
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,2,128,1,fp8,fp8,0,0.029102399945259094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,4,128,1,float16,float16,0,0.03022719919681549
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,4,128,1,fp8,fp8,0,0.02922559976577759
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,4,128,1,float16,fp8,0,0.029104000329971312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,8,128,1,float16,float16,0,0.031020799279212953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,8,128,1,float16,fp8,0,0.029016000032424927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,8,128,1,fp8,fp8,0,0.028984001278877257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,40,128,1,float16,float16,0,0.0228752002120018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,40,128,1,float16,fp8,0,0.024833600223064422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,40,128,1,fp8,fp8,0,0.024822400510311128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,1,128,1,float16,float16,0,0.02054239958524704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,1,128,1,float16,fp8,0,0.02072799950838089
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,2,128,1,float16,float16,0,0.02072640061378479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,2,128,1,float16,fp8,0,0.020577600598335265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,2,128,1,fp8,fp8,0,0.020608000457286835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,4,128,1,float16,float16,0,0.020739200711250304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,4,128,1,float16,fp8,0,0.020555199682712556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,40,4,128,1,fp8,fp8,0,0.04773440062999725
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,4,128,1,fp8,fp8,0,0.020321600139141083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,8,128,1,float16,float16,0,0.02073120027780533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,8,128,1,float16,fp8,0,0.02067520022392273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,8,128,1,fp8,fp8,0,0.020641599595546723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,40,128,1,float16,float16,0,0.016689600050449373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,40,128,1,fp8,fp8,0,0.017115199565887453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,1,128,1,float16,float16,0,0.015140800178050995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,1,128,1,float16,fp8,0,0.014996799826622009
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,1,128,1,fp8,fp8,0,0.014699199795722961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,2,128,1,float16,float16,0,0.015905599296092986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,2,128,1,float16,fp8,0,0.014988799393177033
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,2,128,1,fp8,fp8,0,0.015376000106334687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,4,128,1,float16,float16,0,0.014601600170135499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,4,128,1,float16,fp8,0,0.014587199687957764
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,4,128,1,fp8,fp8,0,0.014571200311183929
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,8,128,1,float16,float16,0,0.016497600078582763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,8,128,1,float16,fp8,0,0.01462399959564209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,40,1,128,1,fp8,fp8,0,0.020716799795627593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,8,128,1,fp8,fp8,0,0.014985600113868713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,40,128,1,float16,float16,0,0.01252480000257492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,40,128,1,float16,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,40,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,1,128,1,float16,float16,0,0.011128000169992446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,1,128,1,float16,fp8,0,0.012892800569534301
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,1,128,1,fp8,fp8,0,0.01146399974822998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,2,128,1,float16,float16,0,0.012368000298738479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,2,128,1,float16,fp8,0,0.0118928000330925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,2,128,1,fp8,fp8,0,0.012468799948692322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,4,128,1,float16,float16,0,0.012510399520397186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,4,128,1,fp8,fp8,0,0.012089599668979645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,8,128,1,float16,float16,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,8,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,8,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,40,40,128,1,float16,fp8,0,0.017793600261211396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,40,128,1,float16,float16,0,0.012392000108957291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,40,128,1,float16,fp8,0,0.01122559979557991
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,40,128,1,fp8,fp8,0,0.0124719999730587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,1,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,1,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,1,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,2,128,1,float16,float16,0,0.010982400178909302
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,40,40,128,1,fp8,fp8,0,0.037859201431274414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,2,128,1,float16,fp8,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,2,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,4,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,4,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,8,128,1,float16,float16,0,0.01111999973654747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,8,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,8,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,40,1,128,1,float16,float16,0,0.2328671932220459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,40,1,128,1,float16,fp8,0,0.23592638969421387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,40,1,128,1,fp8,fp8,0,0.2357952117919922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,40,4,128,1,float16,fp8,0,0.012558400630950928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,40,2,128,1,float16,float16,0,0.23289120197296143
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,40,2,128,1,fp8,fp8,0,0.23571839332580566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,40,4,128,1,float16,float16,0,0.23781919479370117
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,40,4,128,1,float16,fp8,0,0.23533279895782472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,40,4,128,1,float16,float16,0,0.011691199988126755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,40,4,128,1,fp8,fp8,0,0.23571999073028566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,40,8,128,1,float16,float16,0,0.2459264039993286
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,40,8,128,1,float16,fp8,0,0.23564159870147705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,40,8,128,1,fp8,fp8,0,0.23557279109954835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,40,128,1,float16,float16,0,0.1642303943634033
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,40,128,1,float16,fp8,0,0.15584640502929686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,40,128,1,fp8,fp8,0,0.15580320358276367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,1,128,1,float16,float16,0,0.1226032018661499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,1,128,1,float16,fp8,0,0.12315520048141479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,1,128,1,fp8,fp8,0,0.12313439846038818
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,2,128,1,float16,float16,0,0.1224176049232483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,2,128,1,float16,fp8,0,0.12321439981460572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,2,128,1,fp8,fp8,0,0.12308000326156616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,4,128,1,float16,float16,0,0.12534079551696778
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,4,128,1,float16,fp8,0,0.12303999662399293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,4,128,1,fp8,fp8,0,0.12314560413360595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,8,128,1,float16,float16,0,0.1292240023612976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,40,2,128,1,float16,fp8,0,0.2356895923614502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,8,128,1,float16,fp8,0,0.12306720018386841
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,40,128,1,float16,float16,0,0.08644480109214783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,40,128,1,fp8,fp8,0,0.08212479948997498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,1,128,1,float16,float16,0,0.06571040153503419
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,1,128,1,float16,fp8,0,0.06378080248832703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,1,128,1,fp8,fp8,0,0.06438080072402955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,2,128,1,float16,float16,0,0.06518399715423584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,2,128,1,float16,fp8,0,0.0637391984462738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,2,128,1,fp8,fp8,0,0.06369119882583618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,4,128,1,float16,float16,0,0.06786720156669616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,4,128,1,float16,fp8,0,0.06371679902076721
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,4,128,1,fp8,fp8,0,0.0637935996055603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,8,128,1,float16,float16,0,0.06991040110588073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,8,128,1,float16,fp8,0,0.06416000127792358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,8,128,1,fp8,fp8,0,0.06377440094947814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,40,128,1,float16,float16,0,0.049439999461174014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,40,128,1,float16,fp8,0,0.0454367995262146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,40,128,1,fp8,fp8,0,0.045393601059913635
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,1,128,1,float16,float16,0,0.036977601051330564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,1,128,1,fp8,fp8,0,0.037001600861549376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,2,128,1,float16,float16,0,0.037088000774383546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,2,128,1,float16,fp8,0,0.03699199855327606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,2,128,1,fp8,fp8,0,0.03706560134887695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,4,128,1,float16,float16,0,0.03714079856872558
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,4,128,1,float16,fp8,0,0.037041598558425905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,4,128,1,fp8,fp8,0,0.036985599994659425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,8,128,1,float16,float16,0,0.03912320137023926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,8,128,1,float16,fp8,0,0.036955198645591734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,40,8,128,1,fp8,fp8,0,0.12309119701385499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,40,128,1,float16,float16,0,0.026764801144599913
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,40,40,128,1,float16,fp8,0,0.08214399814605713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,40,128,1,float16,fp8,0,0.02685439884662628
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,40,128,1,fp8,fp8,0,0.02699359953403473
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,1,128,1,float16,float16,0,0.02292640060186386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,1,128,1,float16,fp8,0,0.022852799296379088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,1,128,1,fp8,fp8,0,0.022711999714374542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,2,128,1,float16,float16,0,0.023345600068569183
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,2,128,1,float16,fp8,0,0.022737599909305573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,2,128,1,fp8,fp8,0,0.023603199422359465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,4,128,1,float16,float16,0,0.024753600358963013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,4,128,1,float16,fp8,0,0.023468799889087677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,4,128,1,fp8,fp8,0,0.022724799811840057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,8,128,1,float16,float16,0,0.024820800125598907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,8,128,1,float16,fp8,0,0.022720000147819518
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,40,128,1,float16,float16,0,0.018644799292087556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,40,8,128,1,fp8,fp8,0,0.02337439954280853
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,40,128,1,float16,fp8,0,0.018615999817848207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,40,128,1,fp8,fp8,0,0.018646399676799773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,1,128,1,float16,float16,0,0.016575999557971954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,1,128,1,float16,fp8,0,0.016651199758052827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,1,128,1,fp8,fp8,0,0.016705599427223206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,2,128,1,float16,float16,0,0.016704000532627106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,2,128,1,float16,fp8,0,0.016505600512027742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,1,128,1,float16,fp8,0,0.03710399866104126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,2,128,1,fp8,fp8,0,0.016808000206947327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,4,128,1,float16,float16,0,0.016531200706958772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,4,128,1,float16,fp8,0,0.01658879965543747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,4,128,1,fp8,fp8,0,0.01666080057621002
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,8,128,1,float16,float16,0,0.01666080057621002
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,8,128,1,float16,fp8,0,0.01671999990940094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,40,8,128,1,fp8,fp8,0,0.01664000004529953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,40,128,1,float16,float16,0,0.014523200690746307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,40,128,1,float16,fp8,0,0.014558400213718414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,40,128,1,fp8,fp8,0,0.014532800018787383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,1,128,1,float16,float16,0,0.013358399271965027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,1,128,1,float16,fp8,0,0.012611199915409089
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,1,128,1,fp8,fp8,0,0.012588800489902496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,2,128,1,float16,fp8,0,0.01290079951286316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,2,128,1,fp8,fp8,0,0.012636800110340119
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,4,128,1,float16,float16,0,0.013374400138854981
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,4,128,1,float16,fp8,0,0.012604799866676331
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,4,128,1,fp8,fp8,0,0.012537600100040435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,8,128,1,float16,float16,0,0.014534400403499603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,8,128,1,float16,fp8,0,0.014276799559593201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,40,8,128,1,fp8,fp8,0,0.03699359893798828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,40,128,1,float16,float16,0,0.01250080019235611
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,40,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,40,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,1,128,1,float16,float16,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,1,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,1,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,2,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,2,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,2,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,4,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,4,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,4,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,8,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,8,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,40,8,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,40,128,1,float16,float16,0,0.010502400249242783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,2,128,1,float16,float16,0,0.01372160017490387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,40,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,1,128,1,float16,float16,0,0.010355199873447418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,40,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,1,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,2,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,2,128,1,float16,fp8,0,0.010847999900579452
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,2,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,4,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,4,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,4,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,8,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,8,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,40,8,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,40,8,128,1,fp8,fp8,0,0.014584000408649444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,40,1,128,1,float16,float16,0,0.19868639707565308
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,40,1,128,1,fp8,fp8,0,0.19352320432662964
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,40,2,128,1,float16,float16,0,0.19880319833755494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,40,2,128,1,float16,fp8,0,0.19286079406738282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,40,2,128,1,fp8,fp8,0,0.19275200366973877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,40,4,128,1,float16,float16,0,0.20089919567108155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,40,4,128,1,float16,fp8,0,0.1928671956062317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,40,4,128,1,fp8,fp8,0,0.19303679466247559
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,40,8,128,1,float16,float16,0,0.20495200157165527
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,40,8,128,1,float16,fp8,0,0.19300800561904907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,40,8,128,1,fp8,fp8,0,0.19279839992523193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,40,128,1,float16,float16,0,0.12361760139465332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,40,128,1,float16,fp8,0,0.11871520280838013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,40,128,1,fp8,fp8,0,0.11870720386505126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,1,128,1,float16,float16,0,0.104584002494812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,1,128,1,float16,fp8,0,0.10062880516052246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,1,128,1,fp8,fp8,0,0.10055840015411377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,2,128,1,float16,float16,0,0.1047584056854248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,2,128,1,float16,fp8,0,0.10050879716873169
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,2,128,1,fp8,fp8,0,0.1005552053451538
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,4,128,1,float16,float16,0,0.10674400329589843
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,4,128,1,float16,fp8,0,0.10050879716873169
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,4,128,1,fp8,fp8,0,0.10060479640960693
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,8,128,1,float16,float16,0,0.10878080129623413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,8,128,1,float16,fp8,0,0.10052000284194947
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,40,8,128,1,fp8,fp8,0,0.10054559707641601
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,40,128,1,float16,float16,0,0.06837279796600342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,40,128,1,float16,fp8,0,0.061913597583770755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,40,128,1,fp8,fp8,0,0.06171360015869141
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,1,128,1,float16,float16,0,0.05553920269012451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,1,128,1,float16,fp8,0,0.05339679718017578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,2,128,1,float16,float16,0,0.05554080009460449
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,2,128,1,float16,fp8,0,0.05341280102729797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,2,128,1,fp8,fp8,0,0.053508800268173215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,4,128,1,float16,float16,0,0.05724800229072571
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,4,128,1,float16,fp8,0,0.05355679988861084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,4,128,1,fp8,fp8,0,0.05350720286369324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,40,1,128,1,float16,fp8,0,0.1928671956062317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,8,128,1,float16,float16,0,0.057608002424240114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,8,128,1,float16,fp8,0,0.05348479747772217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,8,128,1,fp8,fp8,0,0.053625601530075076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,40,128,1,float16,float16,0,0.03513759970664978
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,40,128,1,float16,fp8,0,0.03506399989128113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,40,128,1,fp8,fp8,0,0.03508960008621216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,1,128,1,float16,fp8,0,0.030883198976516722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,1,128,1,float16,float16,0,0.03314239978790283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,2,128,1,float16,float16,0,0.03291040062904358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,2,128,1,float16,fp8,0,0.03143840134143829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,2,128,1,fp8,fp8,0,0.0309935986995697
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,4,128,1,float16,float16,0,0.033004799485206605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,4,128,1,float16,fp8,0,0.03099839985370636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,4,128,1,fp8,fp8,0,0.0313647985458374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,8,128,1,float16,float16,0,0.03310559988021851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,8,128,1,float16,fp8,0,0.03136799931526184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,8,128,1,fp8,fp8,0,0.030964800715446474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,40,128,1,float16,float16,0,0.022787199914455415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,40,128,1,fp8,fp8,0,0.022814400494098663
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,1,128,1,float16,float16,0,0.020776000618934632
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,40,1,128,1,fp8,fp8,0,0.053521597385406496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,1,128,1,float16,fp8,0,0.02072799950838089
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,1,128,1,fp8,fp8,0,0.02064799964427948
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,2,128,1,float16,float16,0,0.02083359956741333
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,2,128,1,float16,fp8,0,0.02062239944934845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,40,1,128,1,fp8,fp8,0,0.030958399176597595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,2,128,1,fp8,fp8,0,0.020744000375270844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,4,128,1,float16,float16,0,0.020707200467586517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,4,128,1,float16,fp8,0,0.020791999995708466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,4,128,1,fp8,fp8,0,0.020657600462436677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,8,128,1,float16,float16,0,0.022700800001621245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,8,128,1,float16,fp8,0,0.020667199790477753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,8,128,1,fp8,fp8,0,0.020640000700950623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,40,128,1,float16,float16,0,0.01658399999141693
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,40,128,1,float16,fp8,0,0.0165120005607605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,40,128,1,fp8,fp8,0,0.01656319946050644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,1,128,1,float16,float16,0,0.016551999747753142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,1,128,1,float16,fp8,0,0.01451839953660965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,1,128,1,fp8,fp8,0,0.01465120017528534
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,2,128,1,float16,float16,0,0.014579200744628906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,2,128,1,float16,fp8,0,0.014699199795722961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,2,128,1,fp8,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,4,128,1,float16,float16,0,0.01650879979133606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,40,40,128,1,float16,fp8,0,0.022648000717163087
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,4,128,1,float16,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,4,128,1,fp8,fp8,0,0.014670400321483612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,8,128,1,float16,float16,0,0.014604799449443817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,8,128,1,float16,fp8,0,0.01451839953660965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,40,128,1,float16,float16,0,0.01449120044708252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,40,128,1,float16,fp8,0,0.012444800138473511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,40,128,1,fp8,fp8,0,0.01242400035262108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,1,128,1,float16,float16,0,0.012495999783277511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,1,128,1,float16,fp8,0,0.012548799812793731
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,1,128,1,fp8,fp8,0,0.012488000094890594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,2,128,1,float16,float16,0,0.012508800625801087
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,2,128,1,float16,fp8,0,0.012478400021791458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,2,128,1,fp8,fp8,0,0.012513600289821625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,4,128,1,float16,float16,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,4,128,1,float16,fp8,0,0.01244800016283989
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,4,128,1,fp8,fp8,0,0.012511999905109405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,8,128,1,float16,float16,0,0.012465599924325943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,8,128,1,float16,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,40,8,128,1,fp8,fp8,0,0.012476799637079239
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,40,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,40,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,40,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,1,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,1,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,2,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,2,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,2,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,4,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,4,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,4,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,8,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,8,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,8,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,40,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,40,128,1,float16,fp8,0,0.009724800288677216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,1,128,1,float16,float16,0,0.009601599723100662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,40,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,1,128,1,float16,fp8,0,0.008441600203514098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,40,8,128,1,fp8,fp8,0,0.014620800316333771
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,2,128,1,float16,float16,0,0.009465599805116654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,2,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,2,128,1,fp8,fp8,0,0.008907199651002885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,4,128,1,float16,float16,0,0.010375999659299851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,4,128,1,float16,fp8,0,0.008542399853467941
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,4,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,8,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,8,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,8,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,40,1,128,1,float16,float16,0,0.18761440515518188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,40,1,128,1,float16,fp8,0,0.17648799419403077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,40,1,128,1,fp8,fp8,0,0.17650560140609742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,40,2,128,1,float16,float16,0,0.18733760118484497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,40,1,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,40,2,128,1,float16,fp8,0,0.17690720558166503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,40,2,128,1,fp8,fp8,0,0.1773584008216858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,40,4,128,1,float16,float16,0,0.1904703974723816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,40,4,128,1,float16,fp8,0,0.1779296040534973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,40,4,128,1,fp8,fp8,0,0.17812800407409668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,40,8,128,1,float16,float16,0,0.19269440174102784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,40,8,128,1,float16,fp8,0,0.17833919525146485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,40,128,1,float16,float16,0,0.1111680030822754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,40,128,1,float16,fp8,0,0.10264159440994262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,40,128,1,fp8,fp8,0,0.10059200525283814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,1,128,1,float16,float16,0,0.09855999946594238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,1,128,1,float16,fp8,0,0.09237120151519776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,1,128,1,fp8,fp8,0,0.09243999719619751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,2,128,1,float16,float16,0,0.09850720167160035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,2,128,1,float16,fp8,0,0.09250400066375733
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,2,128,1,fp8,fp8,0,0.09236800074577331
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,4,128,1,float16,float16,0,0.09850720167160035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,4,128,1,float16,fp8,0,0.0923807978630066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,4,128,1,fp8,fp8,0,0.09245759844779969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,8,128,1,float16,float16,0,0.09889119863510132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,8,128,1,float16,fp8,0,0.09237599968910218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,40,1,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,40,8,128,1,fp8,fp8,0,0.0923520028591156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,40,128,1,float16,float16,0,0.05752320289611816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,40,128,1,float16,fp8,0,0.055452799797058104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,40,128,1,fp8,fp8,0,0.05544319748878479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,1,128,1,float16,float16,0,0.053431999683380124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,1,128,1,float16,fp8,0,0.05134720206260681
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,1,128,1,fp8,fp8,0,0.05143359899520874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,2,128,1,float16,float16,0,0.05398719906806946
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,2,128,1,fp8,fp8,0,0.05134720206260681
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,4,128,1,float16,float16,0,0.05383520126342774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,4,128,1,float16,fp8,0,0.05140640139579773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,4,128,1,fp8,fp8,0,0.05139679908752441
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,8,128,1,float16,float16,0,0.05550240278244019
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,8,128,1,float16,fp8,0,0.051369601488113405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,8,128,1,fp8,fp8,0,0.05133919715881348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,40,128,1,float16,float16,0,0.03297280073165894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,40,8,128,1,fp8,fp8,0,0.17764159440994262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,40,128,1,fp8,fp8,0,0.0324864000082016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,1,128,1,float16,float16,0,0.03096800148487091
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,1,128,1,float16,fp8,0,0.028969600796699524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,1,128,1,fp8,fp8,0,0.029318401217460634
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,2,128,1,float16,float16,0,0.03091840147972107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,2,128,1,float16,fp8,0,0.029206401109695433
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,2,128,1,fp8,fp8,0,0.029452800750732422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,4,128,1,float16,float16,0,0.03088639974594116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,4,128,1,float16,fp8,0,0.028984001278877257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,4,128,1,fp8,fp8,0,0.030729600787162782
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,8,128,1,float16,float16,0,0.031222400069236756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,8,128,1,float16,fp8,0,0.0306768000125885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,40,2,128,1,float16,fp8,0,0.05146080255508423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,8,128,1,fp8,fp8,0,0.030803200602531434
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,40,128,1,float16,float16,0,0.022763200104236603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,40,128,1,float16,fp8,0,0.020686399936676026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,40,128,1,fp8,fp8,0,0.020745599269866945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,1,128,1,float16,float16,0,0.020633600652217865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,1,128,1,float16,fp8,0,0.01958879977464676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,1,128,1,fp8,fp8,0,0.01865919977426529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,2,128,1,float16,float16,0,0.020734399557113647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,2,128,1,float16,fp8,0,0.018718400597572328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,2,128,1,fp8,fp8,0,0.019734400510787963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,4,128,1,float16,float16,0,0.020630399882793426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,4,128,1,float16,fp8,0,0.019761599600315094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,4,128,1,fp8,fp8,0,0.020556800067424774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,8,128,1,float16,float16,0,0.02061759978532791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,8,128,1,float16,fp8,0,0.019828799366950988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,40,8,128,1,fp8,fp8,0,0.020003199577331543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,40,128,1,float16,float16,0,0.016595199704170227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,40,128,1,float16,fp8,0,0.01464959979057312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,40,40,128,1,float16,fp8,0,0.03235679864883423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,40,128,1,fp8,fp8,0,0.014553600549697876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,1,128,1,float16,float16,0,0.014608000218868256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,1,128,1,fp8,fp8,0,0.014508800208568573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,2,128,1,float16,float16,0,0.014601600170135499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,1,128,1,float16,fp8,0,0.014521600306034088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,2,128,1,float16,fp8,0,0.01475680023431778
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,2,128,1,fp8,fp8,0,0.014476799964904785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,4,128,1,float16,fp8,0,0.014552000164985656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,8,128,1,float16,float16,0,0.014534400403499603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,4,128,1,fp8,fp8,0,0.014747199416160584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,8,128,1,fp8,fp8,0,0.014558400213718414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,40,128,1,float16,float16,0,0.012780800461769104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,40,128,1,float16,fp8,0,0.01255040019750595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,40,128,1,fp8,fp8,0,0.012611199915409089
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,1,128,1,float16,float16,0,0.012488000094890594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,1,128,1,float16,fp8,0,0.012591999769210816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,1,128,1,fp8,fp8,0,0.012444800138473511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,2,128,1,float16,fp8,0,0.0124208003282547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,2,128,1,fp8,fp8,0,0.012465599924325943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,4,128,1,float16,float16,0,0.012408000230789185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,4,128,1,float16,fp8,0,0.012415999919176102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,4,128,1,fp8,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,8,128,1,float16,float16,0,0.012457600235939026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,8,128,1,float16,fp8,0,0.012507200241088867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,8,128,1,fp8,fp8,0,0.012454400211572647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,40,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,40,128,1,float16,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,40,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,1,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,1,128,1,float16,fp8,0,0.009356799721717834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,1,128,1,fp8,fp8,0,0.009471999853849411
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,2,128,1,float16,float16,0,0.010304000228643417
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,2,128,1,float16,fp8,0,0.009292799979448318
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,2,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,4,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,4,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,4,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,4,128,1,float16,float16,0,0.014579200744628906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,8,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,8,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,40,8,128,1,float16,fp8,0,0.01451680064201355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,40,8,128,1,fp8,fp8,0,0.009563200175762177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,40,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,40,128,1,float16,fp8,0,0.008376000076532364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,40,128,1,fp8,fp8,0,0.00867839977145195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,1,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,1,128,1,fp8,fp8,0,0.008404800295829773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,2,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,2,128,1,fp8,fp8,0,0.01032159999012947
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,4,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,40,2,128,1,float16,float16,0,0.01271360069513321
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,4,128,1,float16,fp8,0,0.009369599819183349
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,4,128,1,fp8,fp8,0,0.010355199873447418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,8,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,8,128,1,float16,fp8,0,0.010311999917030334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,8,128,1,fp8,fp8,0,0.010318399965763092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,40,1,128,1,float16,float16,0,0.18500959873199463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,40,1,128,1,float16,fp8,0,0.17216960191726685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,40,1,128,1,fp8,fp8,0,0.17216960191726685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,40,2,128,1,float16,float16,0,0.18497439622879028
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,40,2,128,1,float16,fp8,0,0.1722591996192932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,40,2,128,1,fp8,fp8,0,0.17235360145568848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,40,4,128,1,float16,float16,0,0.18515039682388307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,40,4,128,1,float16,fp8,0,0.17227519750595094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,1,128,1,float16,fp8,0,0.00891520008444786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,40,4,128,1,fp8,fp8,0,0.1721776008605957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,40,2,128,1,float16,fp8,0,0.008472000062465668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,40,8,128,1,float16,float16,0,0.18655680418014525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,40,8,128,1,float16,fp8,0,0.17230720520019532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,40,8,128,1,fp8,fp8,0,0.1721824049949646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,0,0.09148960113525391
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,40,128,1,fp8,fp8,0,0.09036480188369751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,1,128,1,float16,float16,0,0.09847999811172485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,1,128,1,float16,fp8,0,0.0902944028377533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,1,128,1,fp8,fp8,0,0.0902783989906311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,2,128,1,float16,float16,0,0.09691359996795654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,2,128,1,float16,fp8,0,0.09036160111427308
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,2,128,1,fp8,fp8,0,0.09035360217094421
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,4,128,1,float16,float16,0,0.09793599843978881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,4,128,1,float16,fp8,0,0.09030240178108215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,4,128,1,fp8,fp8,0,0.09032480120658874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,8,128,1,float16,float16,0,0.09685119986534119
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,8,128,1,float16,fp8,0,0.0903760015964508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,8,128,1,fp8,fp8,0,0.0903760015964508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,0,0.053793597221374514
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,0,0.05014879703521728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,40,128,1,fp8,fp8,0,0.04978399872779846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,1,128,1,float16,float16,0,0.05344480276107788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,1,128,1,float16,fp8,0,0.04951359927654266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,1,128,1,fp8,fp8,0,0.049430400133132935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,2,128,1,float16,float16,0,0.05351999998092651
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,2,128,1,float16,fp8,0,0.0493120014667511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,2,128,1,fp8,fp8,0,0.05048480033874512
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,4,128,1,float16,float16,0,0.05342879891395569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,4,128,1,float16,fp8,0,0.0504800021648407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,4,128,1,fp8,fp8,0,0.049404799938201904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,8,128,1,float16,float16,0,0.05348160266876221
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,8,128,1,float16,fp8,0,0.04936639964580536
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,40,8,128,1,fp8,fp8,0,0.05026879906654358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,0,0.031543999910354614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,0,0.030022400617599487
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,40,128,1,fp8,fp8,0,0.028880000114440918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,1,128,1,float16,fp8,0,0.028891199827194215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,1,128,1,fp8,fp8,0,0.029145601391792297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,0,0.09854080080986023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,2,128,1,float16,fp8,0,0.028966400027275085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,2,128,1,fp8,fp8,0,0.029193601012229918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,4,128,1,float16,float16,0,0.030956798791885377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,4,128,1,float16,fp8,0,0.02911840081214905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,4,128,1,fp8,fp8,0,0.029020801186561584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,8,128,1,float16,float16,0,0.03103039860725403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,8,128,1,float16,fp8,0,0.02892960011959076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,8,128,1,fp8,fp8,0,0.029183998703956604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,0,0.020848000049591066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,0,0.018614399433135986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,40,128,1,fp8,fp8,0,0.01977279931306839
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,1,128,1,float16,float16,0,0.020609599351882935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,1,128,1,float16,fp8,0,0.019281600415706635
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,1,128,1,fp8,fp8,0,0.0186256006360054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,2,128,1,float16,float16,0,0.020654399693012238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,2,128,1,float16,fp8,0,0.01860000044107437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,2,128,1,fp8,fp8,0,0.01929440051317215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,4,128,1,float16,float16,0,0.020662400126457214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,4,128,1,float16,fp8,0,0.018769599497318268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,4,128,1,fp8,fp8,0,0.018619200587272643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,8,128,1,float16,float16,0,0.020627200603485107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,8,128,1,fp8,fp8,0,0.01870400011539459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,0,0.01648000031709671
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,0,0.014665600657463074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,40,128,1,fp8,fp8,0,0.01459839940071106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,1,128,1,float16,float16,0,0.014612799882888794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,1,128,1,float16,fp8,0,0.01451520025730133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,1,128,1,fp8,fp8,0,0.014636799693107605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,2,128,1,float16,float16,0,0.014588800072669984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,2,128,1,float16,fp8,0,0.014608000218868256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,2,128,1,fp8,fp8,0,0.014638400077819825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,1,128,1,float16,float16,0,0.030980798602104186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,4,128,1,float16,fp8,0,0.014552000164985656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,4,128,1,fp8,fp8,0,0.014481599628925323
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,8,128,1,float16,float16,0,0.014640000462532044
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,8,128,1,float16,fp8,0,0.014448000490665436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,8,128,1,fp8,fp8,0,0.014470399916172027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,0,0.013955199718475341
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,0,0.012438400089740754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,40,128,1,fp8,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,1,128,1,float16,float16,0,0.012529599666595458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,1,128,1,float16,fp8,0,0.012406399846076966
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,1,128,1,fp8,fp8,0,0.012417600303888322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,2,128,1,float16,float16,0,0.01247519999742508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,2,128,1,float16,fp8,0,0.012411200255155564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,2,128,1,fp8,fp8,0,0.012454400211572647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,4,128,1,float16,float16,0,0.012403199821710587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,4,128,1,float16,fp8,0,0.012412799894809723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,4,128,1,fp8,fp8,0,0.0124719999730587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,40,4,128,1,float16,float16,0,0.014531199634075165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,8,128,1,float16,float16,0,0.012600000202655792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,40,2,128,1,float16,float16,0,0.030956798791885377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,8,128,1,float16,fp8,0,0.012488000094890594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,0,0.010763200372457505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,40,128,1,fp8,fp8,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,1,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,1,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,1,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,2,128,1,float16,float16,0,0.010692799836397171
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,40,8,128,1,float16,fp8,0,0.018606400489807128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,2,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,4,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,4,128,1,float16,fp8,0,0.008478400111198426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,4,128,1,fp8,fp8,0,0.008523199707269669
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,8,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,8,128,1,float16,fp8,0,0.008396799862384795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,8,128,1,fp8,fp8,0,0.008420799672603608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,0,0.010356800258159637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,0,0.00838399976491928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,40,128,1,fp8,fp8,0,0.008457600325345992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,1,128,1,float16,float16,0,0.009759999811649323
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,1,128,1,float16,fp8,0,0.009046400338411332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,1,128,1,fp8,fp8,0,0.008388800173997879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,2,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,40,8,128,1,fp8,fp8,0,0.012531200051307678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,2,128,1,fp8,fp8,0,0.008508799970149994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,2,128,1,float16,fp8,0,0.009220799803733826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,4,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,4,128,1,float16,fp8,0,0.008500800281763077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,4,128,1,fp8,fp8,0,0.008430399745702744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,8,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,8,128,1,float16,fp8,0,0.00840959995985031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,40,8,128,1,fp8,fp8,0,0.00950080007314682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,40,2,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,32,1,128,1,float16,fp8,0,7.728556823730469
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,32,1,128,1,fp8,fp8,0,7.890544128417969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,32,1,128,1,float16,float16,0,10.256678771972656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,32,2,128,1,float16,float16,0,10.260806274414062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,32,2,128,1,float16,fp8,0,7.806086730957031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,32,2,128,1,fp8,fp8,0,7.9056236267089846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,32,4,128,1,float16,fp8,0,7.785794830322265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,32,4,128,1,float16,float16,0,10.515010833740234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,32,4,128,1,fp8,fp8,0,7.853099060058594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,32,8,128,1,float16,float16,0,10.87105941772461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,32,8,128,1,fp8,fp8,0,8.018318176269531
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,32,8,128,1,float16,fp8,0,7.988625335693359
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,32,128,1,float16,fp8,0,4.148166275024414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,1,128,1,float16,float16,0,4.884473419189453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,32,128,1,fp8,fp8,0,4.071163177490234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,1,128,1,float16,fp8,0,3.9241199493408203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,1,128,1,fp8,fp8,0,3.9570560455322266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,2,128,1,float16,float16,0,4.808451080322266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,2,128,1,float16,fp8,0,4.012673568725586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,2,128,1,fp8,fp8,0,3.8851585388183594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,4,128,1,float16,float16,0,5.1815185546875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,4,128,1,float16,fp8,0,3.9235790252685545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,4,128,1,fp8,fp8,0,3.8730369567871095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,8,128,1,float16,fp8,0,3.944249725341797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,8,128,1,float16,float16,0,5.258659362792969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,8,128,1,fp8,fp8,0,3.927654266357422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,32,128,1,float16,fp8,0,2.2961936950683595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,32,128,1,fp8,fp8,0,2.1180015563964845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,1,128,1,float16,fp8,0,1.9732431411743163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,1,128,1,float16,float16,0,2.555227279663086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,1,128,1,fp8,fp8,0,2.0279680252075196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,2,128,1,float16,float16,0,2.767940711975098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,2,128,1,float16,fp8,0,1.9918527603149414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,2,128,1,fp8,fp8,0,2.217100715637207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,4,128,1,float16,float16,0,2.4352031707763673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,4,128,1,float16,fp8,0,1.9914112091064453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,4,128,1,fp8,fp8,0,2.1766992568969727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,8,128,1,float16,float16,0,2.4718448638916017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,8,128,1,float16,fp8,0,1.9865232467651368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,8,128,1,fp8,fp8,0,2.3376079559326173
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,32,128,1,float16,fp8,0,1.085041618347168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,32,128,1,fp8,fp8,0,1.085428810119629
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,1,128,1,float16,float16,0,1.2297552108764649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,1,128,1,float16,fp8,0,1.08340482711792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,1,128,1,fp8,fp8,0,1.086736011505127
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,2,128,1,float16,float16,0,1.237508773803711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,2,128,1,float16,fp8,0,1.0451663970947265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,2,128,1,fp8,fp8,0,1.0564831733703612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,4,128,1,float16,float16,0,1.2449551582336427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,4,128,1,float16,fp8,0,1.087928009033203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,4,128,1,fp8,fp8,0,1.1208319664001465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,8,128,1,float16,float16,0,1.2641792297363281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,8,128,1,float16,fp8,0,1.083847999572754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,8,128,1,fp8,fp8,0,1.0709903717041016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,32,1,128,1,float16,float16,0,5.664091110229492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,32,1,128,1,float16,fp8,0,4.546156692504883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,32,1,128,1,fp8,fp8,0,4.528619384765625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,32,2,128,1,float16,float16,0,6.042244720458984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,32,2,128,1,float16,fp8,0,4.695835113525391
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,32,2,128,1,fp8,fp8,0,4.575680160522461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,32,4,128,1,float16,float16,0,6.0299121856689455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,32,4,128,1,float16,fp8,0,4.807656097412109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,32,4,128,1,fp8,fp8,0,4.519454574584961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,32,8,128,1,float16,float16,0,6.092846298217774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,32,8,128,1,float16,fp8,0,4.499552154541016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,32,8,128,1,fp8,fp8,0,4.504391860961914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,32,128,1,float16,float16,0,2.9851024627685545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,32,32,128,1,float16,float16,0,1.3332015991210937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,32,32,128,1,float16,float16,0,2.7159727096557615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,32,32,128,1,float16,float16,0,5.2929023742675785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,32,128,1,float16,fp8,0,2.695964813232422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,32,128,1,fp8,fp8,0,2.420879936218262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,1,128,1,float16,float16,0,2.9046352386474608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,1,128,1,float16,fp8,0,2.375584030151367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,1,128,1,fp8,fp8,0,2.258087921142578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,2,128,1,float16,float16,0,2.7984399795532227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,2,128,1,float16,fp8,0,2.380318450927734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,2,128,1,fp8,fp8,0,2.2750335693359376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,4,128,1,float16,float16,0,2.7470672607421873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,4,128,1,float16,fp8,0,2.3795551300048827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,4,128,1,fp8,fp8,0,2.2678144454956053
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,8,128,1,float16,float16,0,2.900107192993164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,32,128,1,float16,float16,0,1.5932671546936035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,8,128,1,float16,fp8,0,2.5527040481567385
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,32,8,128,1,fp8,fp8,0,2.2656816482543944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,32,128,1,float16,fp8,0,1.330031967163086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,32,128,1,fp8,fp8,0,1.3500720024108888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,1,128,1,float16,fp8,0,1.1801136016845704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,1,128,1,float16,float16,0,1.3878064155578613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,1,128,1,fp8,fp8,0,1.167636775970459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,2,128,1,float16,float16,0,1.4486960411071776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,2,128,1,float16,fp8,0,1.160324764251709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,2,128,1,fp8,fp8,0,1.1901887893676757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,4,128,1,float16,float16,0,1.4234848022460938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,4,128,1,float16,fp8,0,1.376352024078369
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,4,128,1,fp8,fp8,0,1.1600943565368653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,8,128,1,float16,float16,0,1.436019229888916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,8,128,1,float16,fp8,0,1.1764816284179687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,32,128,1,float16,float16,0,0.8140959739685059
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,32,128,1,float16,fp8,0,0.7329855918884277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,32,8,128,1,fp8,fp8,0,1.1893648147583007
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,32,128,1,fp8,fp8,0,0.6627984046936035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,1,128,1,float16,float16,0,0.7345248222351074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,1,128,1,float16,fp8,0,0.6908880233764648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,1,128,1,fp8,fp8,0,0.6174895763397217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,2,128,1,float16,float16,0,0.7393343925476075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,2,128,1,float16,fp8,0,0.6343584060668945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,2,128,1,fp8,fp8,0,0.614790391921997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,4,128,1,float16,float16,0,0.7645055770874023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,4,128,1,float16,fp8,0,0.6557663917541504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,4,128,1,fp8,fp8,0,0.628334379196167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,8,128,1,float16,float16,0,0.7445615768432617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,8,128,1,float16,fp8,0,0.6197743892669678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,32,8,128,1,fp8,fp8,0,0.6542863845825195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,32,1,128,1,float16,fp8,0,3.1918304443359373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,32,1,128,1,fp8,fp8,0,3.2571743011474608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,32,1,128,1,float16,float16,0,4.0104209899902346
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,32,2,128,1,float16,float16,0,4.113766479492187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,32,2,128,1,float16,fp8,0,3.2147567749023436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,32,2,128,1,fp8,fp8,0,3.2532958984375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,32,4,128,1,float16,fp8,0,3.308193588256836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,32,4,128,1,float16,float16,0,4.378247833251953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,32,4,128,1,fp8,fp8,0,3.2501007080078126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,32,8,128,1,float16,float16,0,4.353750228881836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,32,8,128,1,float16,fp8,0,3.293489456176758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,32,8,128,1,fp8,fp8,0,3.605681610107422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,32,128,1,float16,float16,0,2.1933727264404297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,32,128,1,float16,fp8,0,1.9293504714965821
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,32,128,1,fp8,fp8,0,1.7851455688476563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,1,128,1,float16,float16,0,1.927622413635254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,1,128,1,float16,fp8,0,1.7660160064697266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,1,128,1,fp8,fp8,0,1.6430992126464843
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,2,128,1,float16,float16,0,1.9486112594604492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,2,128,1,fp8,fp8,0,1.6263887405395507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,2,128,1,float16,fp8,0,1.7856752395629882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,4,128,1,float16,float16,0,2.076464080810547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,4,128,1,float16,fp8,0,1.9119199752807616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,4,128,1,fp8,fp8,0,1.624305534362793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,8,128,1,float16,float16,0,1.9833808898925782
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,32,128,1,float16,float16,0,1.1314559936523438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,32,128,1,float16,fp8,0,0.997214412689209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,8,128,1,fp8,fp8,0,1.6407567977905273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,32,8,128,1,float16,fp8,0,2.064076805114746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,32,128,1,fp8,fp8,0,0.992743968963623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,1,128,1,float16,fp8,0,0.8384336471557617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,1,128,1,float16,float16,0,1.01834077835083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,1,128,1,fp8,fp8,0,0.8601344108581543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,2,128,1,float16,float16,0,1.0077648162841797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,2,128,1,fp8,fp8,0,0.8418944358825684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,4,128,1,float16,float16,0,1.0428688049316406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,4,128,1,float16,fp8,0,0.9355104446411133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,4,128,1,fp8,fp8,0,0.8465120315551757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,8,128,1,float16,float16,0,1.0634127616882325
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,8,128,1,fp8,fp8,0,0.8416272163391113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,32,128,1,float16,float16,0,0.613646411895752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,32,128,1,float16,fp8,0,0.4872623920440674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,32,128,1,fp8,fp8,0,0.5280511856079102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,1,128,1,float16,float16,0,0.5253439903259277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,1,128,1,float16,fp8,0,0.48032798767089846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,2,128,1,float16,fp8,0,0.8532352447509766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,1,128,1,fp8,fp8,0,0.44873919486999514
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,2,128,1,float16,float16,0,0.548086404800415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,2,128,1,float16,fp8,0,0.44818878173828125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,2,128,1,fp8,fp8,0,0.44916319847106934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,4,128,1,float16,fp8,0,0.4488992214202881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,4,128,1,fp8,fp8,0,0.4492496013641357
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,32,8,128,1,float16,fp8,0,0.9423151969909668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,8,128,1,float16,float16,0,0.5431471824645996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,8,128,1,float16,fp8,0,0.44936161041259765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,8,128,1,fp8,fp8,0,0.4588175773620605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,32,4,128,1,float16,float16,0,0.5481391906738281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,32,1,128,1,float16,fp8,0,4.216398239135742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,32,1,128,1,float16,float16,0,5.194081497192383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,32,1,128,1,fp8,fp8,0,4.26020622253418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,32,2,128,1,float16,fp8,0,4.191100692749023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,32,2,128,1,float16,float16,0,5.341966247558593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,32,2,128,1,fp8,fp8,0,4.634454345703125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,32,4,128,1,float16,float16,0,5.484872055053711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,32,4,128,1,float16,fp8,0,4.236441421508789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,32,4,128,1,fp8,fp8,0,4.279328155517578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,32,8,128,1,float16,float16,0,5.332732772827148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,32,8,128,1,float16,fp8,0,4.365958404541016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,32,128,1,float16,float16,0,2.9517120361328124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,32,8,128,1,fp8,fp8,0,4.250759887695312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,32,128,1,float16,fp8,0,2.5521488189697266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,32,128,1,fp8,fp8,0,2.3864719390869142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,1,128,1,float16,fp8,0,2.138006401062012
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,1,128,1,float16,float16,0,2.544910430908203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,1,128,1,fp8,fp8,0,2.127299118041992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,2,128,1,float16,float16,0,2.679177665710449
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,2,128,1,float16,fp8,0,2.1210784912109375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,2,128,1,fp8,fp8,0,2.2215648651123048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,4,128,1,float16,float16,0,2.5646383285522463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,4,128,1,float16,fp8,0,2.2608192443847654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,4,128,1,fp8,fp8,0,2.118916893005371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,8,128,1,float16,float16,0,2.6226911544799805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,8,128,1,float16,fp8,0,2.63195686340332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,32,128,1,float16,float16,0,1.486684799194336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,32,8,128,1,fp8,fp8,0,2.1433488845825197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,32,128,1,float16,fp8,0,1.469707202911377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,32,128,1,fp8,fp8,0,1.215550422668457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,1,128,1,float16,float16,0,1.3051631927490235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,1,128,1,float16,fp8,0,1.132254409790039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,1,128,1,fp8,fp8,0,1.1315711975097655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,2,128,1,float16,float16,0,1.2691920280456543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,2,128,1,float16,fp8,0,1.2136927604675294
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,2,128,1,fp8,fp8,0,1.0989295959472656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,4,128,1,float16,float16,0,1.3066752433776856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,4,128,1,float16,fp8,0,1.1829919815063477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,4,128,1,fp8,fp8,0,1.1304816246032714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,8,128,1,float16,float16,0,1.3657024383544922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,8,128,1,float16,fp8,0,1.258899211883545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,32,8,128,1,fp8,fp8,0,1.1861136436462403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,32,128,1,float16,float16,0,0.7757743835449219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,32,128,1,float16,fp8,0,0.6781968116760254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,32,128,1,fp8,fp8,0,0.6261392116546631
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,1,128,1,float16,float16,0,0.6584559917449951
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,1,128,1,float16,fp8,0,0.5651391983032227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,1,128,1,fp8,fp8,0,0.6759488105773925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,2,128,1,float16,float16,0,0.6725520133972168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,2,128,1,float16,fp8,0,0.5735375881195068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,2,128,1,fp8,fp8,0,0.5818175792694091
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,4,128,1,float16,fp8,0,0.5766704082489014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,4,128,1,fp8,fp8,0,0.5661104202270508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,8,128,1,float16,float16,0,0.6869152069091797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,8,128,1,float16,fp8,0,0.5765967845916748
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,8,128,1,fp8,fp8,0,0.568175983428955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,32,128,1,float16,float16,0,0.4198319911956787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,32,128,1,float16,fp8,0,0.3398848056793213
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,32,128,1,fp8,fp8,0,0.3393376111984253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,1,128,1,float16,float16,0,0.3616976022720337
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,1,128,1,float16,fp8,0,0.3073472023010254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,1,128,1,fp8,fp8,0,0.3073632001876831
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,2,128,1,float16,float16,0,0.3627007961273193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,2,128,1,float16,fp8,0,0.3077039957046509
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,2,128,1,fp8,fp8,0,0.30584800243377686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,4,128,1,float16,float16,0,0.3639807939529419
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,4,128,1,float16,fp8,0,0.30918240547180176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,4,128,1,fp8,fp8,0,0.30750720500946044
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,8,128,1,float16,float16,0,0.3676255941390991
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,8,128,1,float16,fp8,0,0.3073920011520386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,32,8,128,1,fp8,fp8,0,0.3093087911605835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,32,4,128,1,float16,float16,0,0.7554480075836182
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,32,1,128,1,float16,fp8,0,2.512985610961914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,32,1,128,1,fp8,fp8,0,2.499395179748535
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,32,1,128,1,float16,float16,0,2.9945295333862303
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,32,2,128,1,float16,float16,0,3.038944053649902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,32,2,128,1,float16,fp8,0,2.507640075683594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,32,2,128,1,fp8,fp8,0,2.4984575271606446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,32,4,128,1,float16,float16,0,3.0588096618652343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,32,4,128,1,float16,fp8,0,2.640507125854492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,32,4,128,1,fp8,fp8,0,2.5099727630615236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,32,8,128,1,float16,float16,0,3.031315231323242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,32,8,128,1,float16,fp8,0,2.669606399536133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,32,8,128,1,fp8,fp8,0,2.5267295837402344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,32,128,1,float16,float16,0,1.8016944885253907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,32,128,1,float16,fp8,0,1.505460834503174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,32,128,1,fp8,fp8,0,1.4436800003051757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,1,128,1,float16,float16,0,1.5195728302001954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,1,128,1,float16,fp8,0,1.3886272430419921
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,1,128,1,fp8,fp8,0,1.274398422241211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,2,128,1,float16,float16,0,1.477780818939209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,2,128,1,float16,fp8,0,1.3077088356018067
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,2,128,1,fp8,fp8,0,1.2689807891845704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,4,128,1,float16,float16,0,1.5008064270019532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,4,128,1,float16,fp8,0,1.312342357635498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,4,128,1,fp8,fp8,0,1.2799504280090332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,8,128,1,float16,float16,0,1.5751232147216796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,8,128,1,float16,fp8,0,1.2884016036987305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,32,8,128,1,fp8,fp8,0,1.2788895606994628
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,32,128,1,float16,float16,0,0.9468527793884277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,32,128,1,fp8,fp8,0,0.7427023887634278
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,1,128,1,float16,float16,0,0.7610256195068359
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,1,128,1,float16,fp8,0,0.655188798904419
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,1,128,1,fp8,fp8,0,0.6827663898468017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,2,128,1,float16,float16,0,0.7773087978363037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,2,128,1,float16,fp8,0,0.65447998046875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,2,128,1,fp8,fp8,0,0.654966402053833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,4,128,1,float16,float16,0,0.7713679790496826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,4,128,1,float16,fp8,0,0.657758378982544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,32,128,1,float16,fp8,0,0.7420112133026123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,4,128,1,fp8,fp8,0,0.656057596206665
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,8,128,1,float16,float16,0,0.7924111843109131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,8,128,1,float16,fp8,0,0.6595007896423339
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,32,8,128,1,fp8,fp8,0,0.656879997253418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,32,128,1,float16,float16,0,0.4822591781616211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,32,128,1,float16,fp8,0,0.4058112144470215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,32,128,1,fp8,fp8,0,0.39419519901275635
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,1,128,1,float16,float16,0,0.39819200038909913
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,1,128,1,float16,fp8,0,0.34690079689025877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,1,128,1,fp8,fp8,0,0.36669120788574217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,2,128,1,float16,float16,0,0.4111008167266846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,2,128,1,float16,fp8,0,0.3479327917098999
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,2,128,1,fp8,fp8,0,0.3482127904891968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,4,128,1,float16,float16,0,0.40825600624084474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,4,128,1,float16,fp8,0,0.3538399934768677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,4,128,1,fp8,fp8,0,0.34788639545440675
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,8,128,1,float16,float16,0,0.41849122047424314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,8,128,1,float16,fp8,0,0.36616480350494385
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,32,128,1,float16,float16,0,0.26442880630493165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,32,8,128,1,fp8,fp8,0,0.3504479885101318
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,32,128,1,float16,fp8,0,0.21791360378265381
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,32,128,1,fp8,fp8,0,0.23572640419006347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,1,128,1,float16,fp8,0,0.19313280582427977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,1,128,1,fp8,fp8,0,0.19337120056152343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,2,128,1,float16,float16,0,0.23161919116973878
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,2,128,1,fp8,fp8,0,0.1933632016181946
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,4,128,1,float16,float16,0,0.22201600074768066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,4,128,1,float16,fp8,0,0.19347360134124755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,4,128,1,fp8,fp8,0,0.19290720224380492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,8,128,1,float16,float16,0,0.22920799255371094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,8,128,1,fp8,fp8,0,0.19359999895095825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,1,128,1,float16,float16,0,0.21891679763793945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,2,128,1,float16,fp8,0,0.19271839857101442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,32,1,128,1,float16,float16,0,2.8323104858398436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,32,8,128,1,float16,fp8,0,0.1932927966117859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,32,1,128,1,float16,fp8,0,2.433785629272461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,32,1,128,1,fp8,fp8,0,2.4450016021728516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,32,2,128,1,float16,fp8,0,2.4352352142333986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,32,2,128,1,float16,float16,0,3.0065168380737304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,32,2,128,1,fp8,fp8,0,2.436017608642578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,32,4,128,1,float16,float16,0,2.9898176193237305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,32,4,128,1,float16,fp8,0,2.4415632247924806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,32,4,128,1,fp8,fp8,0,2.49224967956543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,32,8,128,1,float16,float16,0,2.96485595703125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,32,128,1,float16,float16,0,1.8264751434326172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,32,8,128,1,float16,fp8,0,2.7430288314819338
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,32,8,128,1,fp8,fp8,0,2.4512447357177733
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,32,128,1,float16,fp8,0,1.621272087097168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,32,128,1,fp8,fp8,0,1.469164752960205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,1,128,1,float16,fp8,0,1.2309215545654297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,1,128,1,float16,float16,0,1.4324416160583495
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,1,128,1,fp8,fp8,0,1.2334367752075195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,2,128,1,float16,fp8,0,1.2351519584655761
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,2,128,1,float16,float16,0,1.4134032249450683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,2,128,1,fp8,fp8,0,1.3392080307006835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,4,128,1,float16,float16,0,1.4248671531677246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,4,128,1,float16,fp8,0,1.2344223976135253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,4,128,1,fp8,fp8,0,1.4134767532348633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,8,128,1,float16,float16,0,1.5191216468811035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,32,128,1,float16,float16,0,0.9232512474060058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,8,128,1,float16,fp8,0,1.333681583404541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,32,8,128,1,fp8,fp8,0,1.2419967651367188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,32,128,1,float16,fp8,0,0.8130304336547851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,32,128,1,fp8,fp8,0,0.7537888050079345
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,1,128,1,float16,fp8,0,0.6307151794433594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,1,128,1,fp8,fp8,0,0.678659200668335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,2,128,1,float16,fp8,0,0.631166410446167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,2,128,1,float16,float16,0,0.7188655853271484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,2,128,1,fp8,fp8,0,0.6331727981567383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,4,128,1,float16,fp8,0,0.6427487850189209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,4,128,1,float16,float16,0,0.7256239891052246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,4,128,1,fp8,fp8,0,0.6346879959106445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,8,128,1,float16,fp8,0,0.6395919799804688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,8,128,1,fp8,fp8,0,0.6383503913879395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,8,128,1,float16,float16,0,0.7638239860534668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,32,128,1,float16,fp8,0,0.3932816028594971
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,32,128,1,fp8,fp8,0,0.41181120872497556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,1,128,1,float16,float16,0,0.3798687934875488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,32,1,128,1,float16,float16,0,0.710148811340332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,1,128,1,fp8,fp8,0,0.33518080711364745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,2,128,1,float16,float16,0,0.3725584030151367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,2,128,1,float16,fp8,0,0.335313606262207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,2,128,1,fp8,fp8,0,0.3304095983505249
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,4,128,1,float16,float16,0,0.3896591901779175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,4,128,1,float16,fp8,0,0.3321055889129639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,4,128,1,fp8,fp8,0,0.33183679580688474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,8,128,1,float16,float16,0,0.40215678215026857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,32,128,1,float16,float16,0,0.48970880508422854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,1,128,1,float16,fp8,0,0.35663039684295655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,32,128,1,float16,float16,0,0.25592639446258547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,32,128,1,float16,fp8,0,0.21241281032562256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,32,128,1,fp8,fp8,0,0.2114784002304077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,1,128,1,float16,float16,0,0.2017199993133545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,1,128,1,float16,fp8,0,0.1796687960624695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,1,128,1,fp8,fp8,0,0.17992160320281983
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,2,128,1,float16,float16,0,0.20298879146575927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,2,128,1,float16,fp8,0,0.1803104043006897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,2,128,1,fp8,fp8,0,0.17917439937591553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,8,128,1,float16,fp8,0,0.3341248035430908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,4,128,1,float16,fp8,0,0.18137439489364623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,32,8,128,1,fp8,fp8,0,0.33265759944915774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,4,128,1,fp8,fp8,0,0.18078880310058593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,8,128,1,float16,float16,0,0.2107919931411743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,8,128,1,float16,fp8,0,0.18211040496826172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,8,128,1,fp8,fp8,0,0.18087999820709227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,32,128,1,float16,fp8,0,0.121014404296875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,32,128,1,float16,float16,0,0.14476799964904785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,32,128,1,fp8,fp8,0,0.12079520225524902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,1,128,1,float16,float16,0,0.11591839790344238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,1,128,1,float16,fp8,0,0.10470880270004272
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,1,128,1,fp8,fp8,0,0.1045151948928833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,2,128,1,float16,float16,0,0.1156383991241455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,2,128,1,float16,fp8,0,0.10472320318222046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,2,128,1,fp8,fp8,0,0.10461440086364746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,4,128,1,float16,float16,0,0.11772639751434326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,4,128,1,float16,fp8,0,0.10469119548797608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,4,128,1,fp8,fp8,0,0.10468319654464722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,8,128,1,float16,float16,0,0.12010079622268677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,8,128,1,float16,fp8,0,0.1049232006072998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,32,4,128,1,float16,float16,0,0.20529119968414306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,32,1,128,1,float16,fp8,0,1.4978591918945312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,32,1,128,1,float16,float16,0,1.6989215850830077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,32,1,128,1,fp8,fp8,0,1.4980640411376953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,32,2,128,1,float16,fp8,0,1.516096019744873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,32,2,128,1,float16,float16,0,1.6651103973388672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,32,8,128,1,fp8,fp8,0,0.10465600490570068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,32,2,128,1,fp8,fp8,0,1.500972843170166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,32,4,128,1,float16,float16,0,1.7501583099365234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,32,4,128,1,fp8,fp8,0,1.5002816200256348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,32,8,128,1,float16,float16,0,1.8022031784057617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,32,8,128,1,fp8,fp8,0,1.5066656112670898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,32,8,128,1,float16,fp8,0,1.6833328247070312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,32,128,1,float16,float16,0,1.1612048149108887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,32,128,1,float16,fp8,0,0.9333328247070313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,32,128,1,fp8,fp8,0,0.9364784240722657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,32,4,128,1,float16,fp8,0,1.642919921875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,1,128,1,float16,fp8,0,0.7617472171783447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,1,128,1,float16,float16,0,0.8486111640930176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,1,128,1,fp8,fp8,0,0.7626751899719239
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,2,128,1,float16,fp8,0,0.7613999843597412
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,2,128,1,float16,float16,0,0.8435824394226075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,2,128,1,fp8,fp8,0,0.8248496055603027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,4,128,1,float16,float16,0,0.8881183624267578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,4,128,1,float16,fp8,0,0.7638544082641602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,4,128,1,fp8,fp8,0,0.7636703968048095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,8,128,1,float16,fp8,0,0.7761360168457031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,32,128,1,float16,float16,0,0.5770287990570069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,8,128,1,fp8,fp8,0,0.766102409362793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,32,128,1,float16,fp8,0,0.48899359703063966
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,32,128,1,fp8,fp8,0,0.4801680088043213
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,1,128,1,float16,fp8,0,0.3928447961807251
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,1,128,1,fp8,fp8,0,0.39377119541168215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,2,128,1,float16,float16,0,0.4463024139404297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,2,128,1,float16,fp8,0,0.39371840953826903
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,2,128,1,fp8,fp8,0,0.3954511880874634
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,4,128,1,float16,float16,0,0.44488158226013186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,4,128,1,float16,fp8,0,0.3957616090774536
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,4,128,1,fp8,fp8,0,0.39547200202941896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,8,128,1,float16,float16,0,0.4744016170501709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,32,8,128,1,float16,float16,0,0.9177200317382812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,8,128,1,float16,fp8,0,0.3969487905502319
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,8,128,1,fp8,fp8,0,0.40210399627685545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,32,128,1,float16,fp8,0,0.2541696071624756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,32,128,1,fp8,fp8,0,0.2578799962997437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,1,128,1,float16,float16,0,0.22978560924530028
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,1,128,1,float16,fp8,0,0.20930719375610352
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,1,128,1,fp8,fp8,0,0.20918560028076172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,32,1,128,1,float16,float16,0,0.4303872108459473
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,2,128,1,float16,float16,0,0.23662080764770507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,2,128,1,float16,fp8,0,0.20924000740051268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,2,128,1,fp8,fp8,0,0.2091968059539795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,4,128,1,float16,float16,0,0.23967359066009522
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,4,128,1,float16,fp8,0,0.20957438945770263
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,4,128,1,fp8,fp8,0,0.20913119316101075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,8,128,1,float16,float16,0,0.24978880882263182
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,8,128,1,float16,fp8,0,0.21090240478515626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,8,128,1,fp8,fp8,0,0.2104288101196289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,32,128,1,float16,float16,0,0.16876159906387328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,32,128,1,float16,fp8,0,0.1397215962409973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,32,128,1,fp8,fp8,0,0.1396399974822998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,1,128,1,float16,float16,0,0.12812639474868776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,1,128,1,float16,fp8,0,0.11657600402832032
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,1,128,1,fp8,fp8,0,0.11584960222244263
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,2,128,1,float16,float16,0,0.1296895980834961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,2,128,1,float16,fp8,0,0.11698559522628785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,2,128,1,fp8,fp8,0,0.1167248010635376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,4,128,1,float16,float16,0,0.12958240509033203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,4,128,1,float16,fp8,0,0.1167631983757019
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,4,128,1,fp8,fp8,0,0.11699039936065674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,8,128,1,float16,float16,0,0.1370543956756592
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,8,128,1,float16,fp8,0,0.11697920560836791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,32,8,128,1,fp8,fp8,0,0.11756960153579712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,32,128,1,float16,float16,0,0.09606879949569702
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,32,128,1,float16,fp8,0,0.08054080009460449
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,32,128,1,fp8,fp8,0,0.08015519976615906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,1,128,1,float16,float16,0,0.07684320211410522
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,32,32,128,1,float16,float16,0,0.30411999225616454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,1,128,1,fp8,fp8,0,0.06999520063400269
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,2,128,1,float16,float16,0,0.07605760097503662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,2,128,1,float16,fp8,0,0.07020800113677979
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,2,128,1,fp8,fp8,0,0.06978560090065003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,4,128,1,float16,float16,0,0.07773119807243348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,4,128,1,float16,fp8,0,0.06980640292167664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,4,128,1,fp8,fp8,0,0.06987839937210083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,8,128,1,float16,float16,0,0.0780239999294281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,8,128,1,float16,fp8,0,0.06988639831542968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,8,128,1,fp8,fp8,0,0.06981760263442993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,32,1,128,1,float16,fp8,0,1.5421232223510741
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,32,1,128,1,float16,float16,0,1.6889055252075196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,32,1,128,1,float16,fp8,0,0.07001439929008484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,32,1,128,1,fp8,fp8,0,1.5426783561706543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,32,2,128,1,float16,float16,0,1.7362960815429687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,32,2,128,1,float16,fp8,0,1.5417984008789063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,32,2,128,1,fp8,fp8,0,1.5438048362731933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,32,4,128,1,float16,float16,0,1.7886480331420898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,32,4,128,1,float16,fp8,0,1.5438176155090333
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,32,4,128,1,fp8,fp8,0,1.5451295852661133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,32,8,128,1,float16,float16,0,1.822804832458496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,32,128,1,float16,float16,0,1.1934864044189453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,32,8,128,1,float16,fp8,0,1.6625263214111328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,32,8,128,1,fp8,fp8,0,1.5524959564208984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,32,128,1,float16,fp8,0,1.0743087768554687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,1,128,1,float16,fp8,0,0.7807472229003907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,1,128,1,float16,float16,0,0.8544511795043945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,32,128,1,fp8,fp8,0,1.0246687889099122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,1,128,1,fp8,fp8,0,0.7813280105590821
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,2,128,1,float16,fp8,0,0.7892672061920166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,2,128,1,fp8,fp8,0,0.7826367855072022
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,4,128,1,float16,float16,0,0.8692367553710938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,4,128,1,float16,fp8,0,0.7830239772796631
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,4,128,1,fp8,fp8,0,0.782966423034668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,8,128,1,float16,float16,0,0.9295295715332031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,8,128,1,fp8,fp8,0,0.7868847846984863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,32,128,1,float16,float16,0,0.6234735965728759
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,32,128,1,float16,fp8,0,0.5136608123779297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,1,128,1,float16,float16,0,0.42603359222412107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,32,128,1,fp8,fp8,0,0.5364319801330566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,2,128,1,float16,float16,0,0.8359408378601074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,1,128,1,float16,fp8,0,0.4005583763122559
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,1,128,1,fp8,fp8,0,0.400651216506958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,2,128,1,float16,float16,0,0.42751197814941405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,2,128,1,float16,fp8,0,0.40117440223693845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,2,128,1,fp8,fp8,0,0.40147199630737307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,4,128,1,float16,float16,0,0.4417759895324707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,4,128,1,float16,fp8,0,0.4019887924194336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,4,128,1,fp8,fp8,0,0.4018415927886963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,8,128,1,float16,float16,0,0.4684271812438965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,8,128,1,fp8,fp8,0,0.40444002151489256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,32,8,128,1,float16,fp8,0,0.788369607925415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,32,128,1,float16,float16,0,0.31492478847503663
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,32,128,1,float16,fp8,0,0.26840159893035886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,32,128,1,fp8,fp8,0,0.26858880519866946
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,1,128,1,float16,float16,0,0.22459681034088136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,1,128,1,fp8,fp8,0,0.21099998950958251
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,2,128,1,float16,float16,0,0.2251823902130127
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,2,128,1,float16,fp8,0,0.21113760471343995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,2,128,1,fp8,fp8,0,0.2112368106842041
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,4,128,1,float16,float16,0,0.23581440448760987
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,4,128,1,float16,fp8,0,0.21047680377960204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,4,128,1,fp8,fp8,0,0.21159679889678956
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,8,128,1,float16,float16,0,0.244486403465271
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,8,128,1,float16,fp8,0,0.21311519145965577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,8,128,1,fp8,fp8,0,0.21142880916595458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,32,128,1,float16,float16,0,0.17082879543304444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,32,128,1,float16,fp8,0,0.1444416046142578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,32,128,1,fp8,fp8,0,0.14566880464553833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,1,128,1,float16,float16,0,0.12271360158920289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,1,128,1,float16,fp8,0,0.11439839601516724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,1,128,1,fp8,fp8,0,0.11442400217056274
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,2,128,1,float16,float16,0,0.12494560480117797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,2,128,1,float16,fp8,0,0.1141152024269104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,2,128,1,fp8,fp8,0,0.1148144006729126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,4,128,1,float16,float16,0,0.1251744031906128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,4,128,1,float16,fp8,0,0.11539679765701294
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,4,128,1,fp8,fp8,0,0.11491680145263672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,8,128,1,float16,float16,0,0.13534560203552246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,8,128,1,float16,fp8,0,0.115009605884552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,32,8,128,1,fp8,fp8,0,0.1167456030845642
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,32,128,1,float16,float16,0,0.09611520171165466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,32,1,128,1,float16,fp8,0,0.2105936050415039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,32,128,1,float16,fp8,0,0.08216639757156372
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,32,128,1,fp8,fp8,0,0.08178079724311829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,1,128,1,float16,float16,0,0.0699504017829895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,1,128,1,float16,fp8,0,0.0668720006942749
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,1,128,1,fp8,fp8,0,0.06672000288963317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,2,128,1,float16,float16,0,0.07171840071678162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,2,128,1,float16,fp8,0,0.06592640280723572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,2,128,1,fp8,fp8,0,0.06645600199699402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,4,128,1,float16,float16,0,0.07185599803924561
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,4,128,1,float16,fp8,0,0.06629440188407898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,4,128,1,fp8,fp8,0,0.06597279906272888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,8,128,1,float16,float16,0,0.07480480074882508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,8,128,1,float16,fp8,0,0.06673920154571533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,32,8,128,1,fp8,fp8,0,0.06688799858093261
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,32,128,1,float16,float16,0,0.055379199981689456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,32,128,1,float16,fp8,0,0.04963200092315674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,32,128,1,fp8,fp8,0,0.0494271993637085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,1,128,1,float16,float16,0,0.04740000069141388
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,1,128,1,float16,fp8,0,0.04317919909954071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,1,128,1,fp8,fp8,0,0.04326240122318268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,2,128,1,float16,float16,0,0.0463919997215271
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,2,128,1,float16,fp8,0,0.04356000125408173
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,2,128,1,fp8,fp8,0,0.04321120083332062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,4,128,1,float16,float16,0,0.04742240011692047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,4,128,1,float16,fp8,0,0.043172800540924074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,4,128,1,fp8,fp8,0,0.043886399269104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,8,128,1,float16,float16,0,0.04763360023498535
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,8,128,1,float16,fp8,0,0.04396960139274597
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,32,8,128,1,fp8,fp8,0,0.04322080016136169
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,32,8,128,1,float16,fp8,0,0.4039584159851074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,32,1,128,1,float16,float16,0,1.0532192230224608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,32,1,128,1,float16,fp8,0,0.9908448219299316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,32,1,128,1,fp8,fp8,0,0.9911824226379394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,32,2,128,1,float16,float16,0,1.0612480163574218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,32,2,128,1,float16,fp8,0,0.9918671607971191
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,32,2,128,1,fp8,fp8,0,0.9918767929077148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,32,4,128,1,float16,fp8,0,0.9939071655273437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,32,4,128,1,float16,float16,0,1.1209152221679688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,32,4,128,1,fp8,fp8,0,0.9939007759094238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,32,8,128,1,float16,fp8,0,1.073761558532715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,32,8,128,1,fp8,fp8,0,0.9996767997741699
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,32,128,1,float16,float16,0,0.7883008003234864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,32,128,1,float16,fp8,0,0.6721536159515381
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,1,128,1,float16,float16,0,0.5268832206726074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,32,128,1,fp8,fp8,0,0.6716288089752197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,1,128,1,float16,fp8,0,0.5060400009155274
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,1,128,1,fp8,fp8,0,0.5044640064239502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,2,128,1,float16,float16,0,0.5278751850128174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,2,128,1,float16,fp8,0,0.506279993057251
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,2,128,1,fp8,fp8,0,0.5049536228179932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,4,128,1,float16,float16,0,0.548142385482788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,4,128,1,float16,fp8,0,0.5082304000854492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,4,128,1,fp8,fp8,0,0.5063344001770019
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,8,128,1,float16,float16,0,0.5856639862060546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,8,128,1,float16,fp8,0,0.5107600212097168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,32,8,128,1,fp8,fp8,0,0.5090911865234375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,32,8,128,1,float16,float16,0,1.1506863594055177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,32,128,1,float16,float16,0,0.4030655860900879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,32,128,1,float16,fp8,0,0.3464303970336914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,32,128,1,fp8,fp8,0,0.3457855939865112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,1,128,1,float16,float16,0,0.27128000259399415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,1,128,1,fp8,fp8,0,0.26207358837127687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,2,128,1,float16,float16,0,0.2755376100540161
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,2,128,1,float16,fp8,0,0.261080002784729
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,2,128,1,fp8,fp8,0,0.26231040954589846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,4,128,1,float16,float16,0,0.28607840538024903
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,4,128,1,float16,fp8,0,0.2619983911514282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,4,128,1,fp8,fp8,0,0.2627376079559326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,8,128,1,float16,float16,0,0.3045232057571411
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,8,128,1,float16,fp8,0,0.2640655994415283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,8,128,1,fp8,fp8,0,0.2644927978515625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,32,128,1,float16,fp8,0,0.18253599405288695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,32,128,1,fp8,fp8,0,0.18261439800262452
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,1,128,1,float16,float16,0,0.1451024055480957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,1,128,1,float16,fp8,0,0.13916480541229248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,1,128,1,fp8,fp8,0,0.1394335985183716
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,2,128,1,float16,float16,0,0.1483728051185608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,2,128,1,float16,fp8,0,0.1392832040786743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,2,128,1,fp8,fp8,0,0.13961119651794435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,4,128,1,float16,float16,0,0.14996479749679564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,4,128,1,float16,fp8,0,0.13965920209884644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,4,128,1,fp8,fp8,0,0.13947839736938478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,8,128,1,float16,float16,0,0.1640064001083374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,8,128,1,float16,fp8,0,0.14041759967803955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,8,128,1,fp8,fp8,0,0.14111520051956178
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,32,128,1,float16,float16,0,0.11535359621047973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,32,128,1,float16,fp8,0,0.10061119794845581
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,32,128,1,fp8,fp8,0,0.10011680126190185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,1,128,1,float16,float16,0,0.08215839862823486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,1,128,1,float16,fp8,0,0.0764415979385376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,32,1,128,1,float16,fp8,0,0.26122560501098635
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,1,128,1,fp8,fp8,0,0.0777728021144867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,2,128,1,float16,float16,0,0.08188319802284241
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,2,128,1,float16,fp8,0,0.07714080214500427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,2,128,1,fp8,fp8,0,0.07757920026779175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,4,128,1,float16,float16,0,0.0842095971107483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,4,128,1,float16,fp8,0,0.07736960053443909
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,4,128,1,fp8,fp8,0,0.0777679979801178
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,8,128,1,float16,float16,0,0.08874559998512269
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,8,128,1,fp8,fp8,0,0.07833279967308045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,32,128,1,float16,float16,0,0.06766560077667236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,32,128,1,float16,fp8,0,0.05751519799232483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,32,128,1,fp8,fp8,0,0.05622879862785339
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,1,128,1,float16,float16,0,0.04980640113353729
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,1,128,1,float16,fp8,0,0.0473471999168396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,1,128,1,fp8,fp8,0,0.04727199971675873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,32,32,128,1,float16,float16,0,0.2124176025390625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,2,128,1,float16,fp8,0,0.047251200675964354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,2,128,1,fp8,fp8,0,0.047302401065826415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,4,128,1,float16,float16,0,0.05076479911804199
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,4,128,1,float16,fp8,0,0.04738079905509949
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,4,128,1,fp8,fp8,0,0.047312000393867494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,8,128,1,float16,float16,0,0.05227680206298828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,8,128,1,float16,fp8,0,0.04736160039901734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,8,128,1,fp8,fp8,0,0.04729439914226532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,32,128,1,float16,float16,0,0.03901279866695404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,32,128,1,float16,fp8,0,0.037041598558425905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,32,128,1,fp8,fp8,0,0.0370959997177124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,1,128,1,float16,float16,0,0.034980800747871396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,1,128,1,float16,fp8,0,0.031615999341011045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,1,128,1,fp8,fp8,0,0.03253760039806366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,2,128,1,float16,float16,0,0.035016000270843506
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,32,8,128,1,float16,fp8,0,0.07736319899559022
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,2,128,1,float16,fp8,0,0.03294239938259125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,2,128,1,fp8,fp8,0,0.03288159966468811
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,4,128,1,float16,float16,0,0.035006400942802426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,4,128,1,float16,fp8,0,0.0329263985157013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,4,128,1,fp8,fp8,0,0.032924801111221313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,8,128,1,float16,float16,0,0.03521760106086731
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,8,128,1,float16,fp8,0,0.03295519948005676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,32,8,128,1,fp8,fp8,0,0.032892799377441405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,32,2,128,1,float16,float16,0,0.05005919933319092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,32,1,128,1,float16,float16,0,1.098470401763916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,32,1,128,1,float16,fp8,0,1.088696002960205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,32,1,128,1,fp8,fp8,0,1.091539192199707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,32,2,128,1,float16,float16,0,1.1104703903198243
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,32,2,128,1,float16,fp8,0,1.0892463684082032
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,32,2,128,1,fp8,fp8,0,1.09335355758667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,32,4,128,1,float16,float16,0,1.157806396484375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,32,4,128,1,float16,fp8,0,1.0970383644104005
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,32,4,128,1,fp8,fp8,0,1.0925071716308594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,32,8,128,1,float16,float16,0,1.2656047821044922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,32,128,1,float16,float16,0,0.8904175758361816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,32,8,128,1,float16,fp8,0,1.1000608444213866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,32,128,1,float16,fp8,0,0.7725552082061767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,32,8,128,1,fp8,fp8,0,1.1010095596313476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,1,128,1,float16,float16,0,0.5570896148681641
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,1,128,1,fp8,fp8,0,0.5530064105987549
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,32,128,1,fp8,fp8,0,0.7685455799102783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,2,128,1,float16,float16,0,0.5613423824310303
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,2,128,1,float16,fp8,0,0.5532559871673584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,2,128,1,fp8,fp8,0,0.5532576084136963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,4,128,1,float16,float16,0,0.5882736206054687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,4,128,1,fp8,fp8,0,0.5541024208068848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,8,128,1,float16,float16,0,0.6327712059020996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,8,128,1,float16,fp8,0,0.5579343795776367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,8,128,1,fp8,fp8,0,0.5576128005981446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,32,128,1,float16,fp8,0,0.3928175926208496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,1,128,1,float16,fp8,0,0.5537744045257569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,32,128,1,fp8,fp8,0,0.39302399158477785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,1,128,1,float16,float16,0,0.2907104015350342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,1,128,1,float16,fp8,0,0.28362560272216797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,1,128,1,fp8,fp8,0,0.2842864036560059
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,2,128,1,float16,float16,0,0.2889024019241333
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,2,128,1,float16,fp8,0,0.28471519947052004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,32,4,128,1,float16,fp8,0,0.5544528007507324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,2,128,1,fp8,fp8,0,0.2836143970489502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,4,128,1,float16,float16,0,0.30574080944061277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,4,128,1,float16,fp8,0,0.28471999168395995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,4,128,1,fp8,fp8,0,0.28454079627990725
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,8,128,1,float16,float16,0,0.3284064054489136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,8,128,1,float16,fp8,0,0.28707520961761473
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,8,128,1,fp8,fp8,0,0.28663361072540283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,32,128,1,float16,float16,0,0.23722081184387206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,32,128,1,float16,fp8,0,0.2042912006378174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,32,128,1,fp8,fp8,0,0.20423998832702636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,1,128,1,float16,float16,0,0.15370880365371703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,1,128,1,float16,fp8,0,0.1496384024620056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,32,32,128,1,float16,float16,0,0.4531951904296875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,1,128,1,fp8,fp8,0,0.14853919744491578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,2,128,1,float16,float16,0,0.1552016019821167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,2,128,1,fp8,fp8,0,0.14933760166168214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,4,128,1,float16,fp8,0,0.1498255968093872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,4,128,1,fp8,fp8,0,0.14973440170288085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,8,128,1,float16,float16,0,0.17329599857330322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,8,128,1,float16,fp8,0,0.1506592035293579
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,8,128,1,fp8,fp8,0,0.1510655999183655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,32,128,1,float16,float16,0,0.12653919458389282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,32,128,1,float16,fp8,0,0.11082719564437866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,32,128,1,fp8,fp8,0,0.1108847975730896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,1,128,1,float16,float16,0,0.0858784019947052
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,1,128,1,float16,fp8,0,0.08116160035133362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,1,128,1,fp8,fp8,0,0.0809391975402832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,2,128,1,float16,float16,0,0.08512799739837647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,2,128,1,float16,fp8,0,0.08105760216712951
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,2,128,1,fp8,fp8,0,0.08139520287513732
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,4,128,1,float16,float16,0,0.08813760280609131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,4,128,1,float16,fp8,0,0.08209279775619507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,4,128,1,fp8,fp8,0,0.0819263994693756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,8,128,1,float16,float16,0,0.09347839951515198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,8,128,1,float16,fp8,0,0.08238080143928528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,32,8,128,1,fp8,fp8,0,0.08225759863853455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,32,128,1,float16,float16,0,0.07197279930114746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,32,128,1,float16,fp8,0,0.061945599317550656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,32,128,1,fp8,fp8,0,0.061799997091293336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,1,128,1,float16,float16,0,0.049430400133132935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,1,128,1,float16,fp8,0,0.047393599152565004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,1,128,1,fp8,fp8,0,0.0474047988653183
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,2,128,1,float16,float16,0,0.049384000897407535
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,2,128,1,float16,fp8,0,0.0473008006811142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,2,128,1,float16,fp8,0,0.1495776057243347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,2,128,1,fp8,fp8,0,0.04718399941921234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,32,4,128,1,float16,float16,0,0.1594591975212097
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,4,128,1,float16,float16,0,0.0506816029548645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,4,128,1,float16,fp8,0,0.04719200134277344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,4,128,1,fp8,fp8,0,0.04718720018863678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,8,128,1,float16,float16,0,0.05345119833946228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,8,128,1,float16,fp8,0,0.04743359982967377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,32,8,128,1,fp8,fp8,0,0.04733920097351074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,32,128,1,float16,float16,0,0.04056319892406464
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,32,128,1,float16,fp8,0,0.03715839982032776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,32,128,1,fp8,fp8,0,0.037003201246261594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,1,128,1,float16,float16,0,0.03313280045986176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,1,128,1,float16,fp8,0,0.030895999073982237
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,1,128,1,fp8,fp8,0,0.031011199951171874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,2,128,1,float16,float16,0,0.03295679986476898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,2,128,1,float16,fp8,0,0.030924800038337707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,2,128,1,fp8,fp8,0,0.03089439868927002
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,4,128,1,float16,float16,0,0.03297280073165894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,4,128,1,float16,fp8,0,0.0310479998588562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,4,128,1,fp8,fp8,0,0.03089439868927002
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,8,128,1,float16,float16,0,0.033662399649620055
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,8,128,1,float16,fp8,0,0.030987200140953065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,32,8,128,1,fp8,fp8,0,0.030884799361228944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,32,128,1,float16,float16,0,0.022726400196552275
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,32,128,1,float16,fp8,0,0.023228800296783446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,1,128,1,float16,float16,0,0.020640000700950623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,1,128,1,float16,fp8,0,0.020710399746894835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,1,128,1,fp8,fp8,0,0.020659199357032774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,2,128,1,float16,float16,0,0.0208079993724823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,2,128,1,float16,fp8,0,0.020665599405765532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,2,128,1,fp8,fp8,0,0.020640000700950623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,4,128,1,float16,float16,0,0.020761600136756896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,4,128,1,float16,fp8,0,0.020787200331687926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,4,128,1,fp8,fp8,0,0.020656000077724456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,8,128,1,float16,float16,0,0.022724799811840057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,8,128,1,float16,fp8,0,0.020688000321388244
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,8,128,1,fp8,fp8,0,0.020667199790477753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,32,1,128,1,float16,float16,0,0.8448543548583984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,32,1,128,1,float16,fp8,0,0.8583711624145508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,32,1,128,1,fp8,fp8,0,0.859921646118164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,32,2,128,1,float16,float16,0,0.848259162902832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,32,32,128,1,fp8,fp8,0,0.02285120040178299
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,32,2,128,1,float16,fp8,0,0.8597040176391602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,32,2,128,1,fp8,fp8,0,0.8583583831787109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,32,4,128,1,float16,float16,0,0.8899439811706543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,32,4,128,1,float16,fp8,0,0.8616527557373047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,32,8,128,1,float16,float16,0,0.9713744163513184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,32,8,128,1,float16,fp8,0,0.8630000114440918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,32,8,128,1,fp8,fp8,0,0.8649375915527344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,32,128,1,float16,fp8,0,0.6425856113433838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,32,128,1,fp8,fp8,0,0.6444928169250488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,1,128,1,float16,float16,0,0.4285103797912598
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,1,128,1,float16,fp8,0,0.43704957962036134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,1,128,1,fp8,fp8,0,0.436246395111084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,2,128,1,float16,float16,0,0.43387517929077146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,2,128,1,float16,fp8,0,0.43625922203063966
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,32,4,128,1,fp8,fp8,0,0.8603615760803223
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,2,128,1,fp8,fp8,0,0.4373055934906006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,4,128,1,float16,float16,0,0.4530943870544434
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,4,128,1,float16,fp8,0,0.4365583896636963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,4,128,1,fp8,fp8,0,0.43709120750427244
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,8,128,1,float16,float16,0,0.49338560104370116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,32,128,1,float16,float16,0,0.738321590423584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,8,128,1,float16,fp8,0,0.4380159854888916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,32,8,128,1,fp8,fp8,0,0.43916001319885256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,32,128,1,float16,float16,0,0.3756511926651001
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,32,128,1,float16,fp8,0,0.3277872085571289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,32,128,1,fp8,fp8,0,0.32832159996032717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,1,128,1,float16,float16,0,0.22359519004821776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,1,128,1,float16,fp8,0,0.22369120121002198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,2,128,1,float16,float16,0,0.22373600006103517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,2,128,1,float16,fp8,0,0.2253200054168701
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,2,128,1,fp8,fp8,0,0.22393279075622557
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,4,128,1,float16,float16,0,0.23444321155548095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,4,128,1,fp8,fp8,0,0.22436800003051757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,8,128,1,float16,float16,0,0.25416638851165774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,8,128,1,float16,fp8,0,0.22538719177246094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,8,128,1,fp8,fp8,0,0.22552158832550048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,32,128,1,float16,float16,0,0.19485759735107422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,32,128,1,float16,fp8,0,0.17098720073699952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,1,128,1,float16,float16,0,0.11831840276718139
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,1,128,1,float16,fp8,0,0.11762880086898804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,1,128,1,fp8,fp8,0,0.11805440187454223
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,2,128,1,float16,float16,0,0.11906559467315674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,2,128,1,float16,fp8,0,0.11822719573974609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,2,128,1,fp8,fp8,0,0.11871839761734009
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,4,128,1,float16,float16,0,0.12376799583435058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,4,128,1,float16,fp8,0,0.11868640184402465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,4,128,1,fp8,fp8,0,0.11885600090026856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,8,128,1,float16,float16,0,0.13396639823913575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,8,128,1,float16,fp8,0,0.11901600360870361
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,8,128,1,fp8,fp8,0,0.11901279687881469
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,4,128,1,float16,fp8,0,0.22543199062347413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,32,128,1,float16,float16,0,0.10478559732437134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,32,128,1,float16,fp8,0,0.09240319728851318
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,32,128,1,fp8,fp8,0,0.09231200218200683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,1,128,1,float16,float16,0,0.06668639779090882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,32,32,128,1,fp8,fp8,0,0.1703312039375305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,1,128,1,float16,fp8,0,0.06372479796409607
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,1,128,1,fp8,fp8,0,0.0638047993183136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,2,128,1,float16,float16,0,0.06724960207939149
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,2,128,1,float16,fp8,0,0.06387360095977783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,2,128,1,fp8,fp8,0,0.06458399891853332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,4,128,1,float16,float16,0,0.06860960125923157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,4,128,1,float16,fp8,0,0.06547840237617493
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,4,128,1,fp8,fp8,0,0.0644864022731781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,8,128,1,float16,float16,0,0.07450399994850158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,8,128,1,float16,fp8,0,0.06571040153503419
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,32,8,128,1,fp8,fp8,0,0.0659168004989624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,32,128,1,float16,float16,0,0.05932639837265015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,32,128,1,float16,fp8,0,0.05267999768257141
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,32,128,1,fp8,fp8,0,0.05169919729232788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,1,128,1,float16,float16,0,0.03916159868240356
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,1,128,1,float16,fp8,0,0.03727999925613403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,1,128,1,fp8,fp8,0,0.037215998768806456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,2,128,1,float16,float16,0,0.03902080059051514
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,2,128,1,float16,fp8,0,0.037143999338150026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,2,128,1,fp8,fp8,0,0.03707199990749359
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,4,128,1,float16,float16,0,0.03946720063686371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,4,128,1,float16,fp8,0,0.03705120086669922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,32,1,128,1,fp8,fp8,0,0.22384319305419922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,4,128,1,fp8,fp8,0,0.03728959858417511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,8,128,1,float16,fp8,0,0.03714079856872558
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,8,128,1,fp8,fp8,0,0.03764640092849732
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,32,128,1,float16,float16,0,0.033024001121521
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,32,128,1,float16,fp8,0,0.030931198596954347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,32,128,1,fp8,fp8,0,0.030851200222969055
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,1,128,1,float16,fp8,0,0.024827200174331664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,1,128,1,fp8,fp8,0,0.02470400035381317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,2,128,1,float16,float16,0,0.02494879961013794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,2,128,1,float16,fp8,0,0.024753600358963013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,2,128,1,fp8,fp8,0,0.024691200256347655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,4,128,1,float16,float16,0,0.025022399425506592
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,4,128,1,float16,fp8,0,0.0247296005487442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,4,128,1,fp8,fp8,0,0.024780799448490144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,8,128,1,float16,float16,0,0.026892799139022826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,8,128,1,float16,fp8,0,0.02475679963827133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,8,128,1,fp8,fp8,0,0.024772800505161285
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,32,128,1,float16,float16,0,0.018862399458885192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,32,128,1,float16,fp8,0,0.02067359983921051
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,32,128,1,fp8,fp8,0,0.020529599487781526
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,1,128,1,float16,float16,0,0.01661120057106018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,1,128,1,float16,fp8,0,0.01658719927072525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,1,128,1,fp8,fp8,0,0.016598400473594666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,2,128,1,float16,float16,0,0.016707199811935424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,2,128,1,float16,fp8,0,0.016521599888801575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,4,128,1,float16,float16,0,0.01674720048904419
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,2,128,1,fp8,fp8,0,0.016646400094032288
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,32,8,128,1,float16,float16,0,0.043166399002075195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,4,128,1,fp8,fp8,0,0.01671359986066818
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,8,128,1,float16,float16,0,0.016622400283813475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,8,128,1,fp8,fp8,0,0.01661120057106018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,32,128,1,float16,float16,0,0.016582399606704712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,32,1,128,1,float16,float16,0,0.025212800502777098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,32,128,1,float16,fp8,0,0.01663679927587509
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,32,128,1,fp8,fp8,0,0.016795200109481812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,1,128,1,float16,float16,0,0.01478080004453659
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,1,128,1,float16,fp8,0,0.014616000652313232
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,1,128,1,fp8,fp8,0,0.014499199390411378
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,2,128,1,float16,float16,0,0.016545599699020384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,2,128,1,float16,fp8,0,0.014505599439144135
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,2,128,1,fp8,fp8,0,0.014633600413799287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,4,128,1,float16,float16,0,0.015732799470424653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,4,128,1,float16,fp8,0,0.014646400511264802
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,4,128,1,fp8,fp8,0,0.014595200121402741
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,8,128,1,float16,float16,0,0.016519999504089354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,8,128,1,float16,fp8,0,0.014534400403499603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,32,8,128,1,fp8,fp8,0,0.014595200121402741
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,32,1,128,1,float16,float16,0,0.35225439071655273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,32,1,128,1,float16,fp8,0,0.3690783977508545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,32,1,128,1,fp8,fp8,0,0.36884000301361086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,32,2,128,1,float16,float16,0,0.355294394493103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,32,2,128,1,float16,fp8,0,0.36875998973846436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,32,2,128,1,fp8,fp8,0,0.3692847967147827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,32,4,128,1,float16,float16,0,0.373854398727417
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,32,4,128,1,float16,fp8,0,0.3690272092819214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,4,128,1,float16,fp8,0,0.016550399363040924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,32,8,128,1,float16,fp8,0,0.016804799437522888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,32,4,128,1,fp8,fp8,0,0.3685695886611938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,32,8,128,1,float16,float16,0,0.4143375873565674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,32,8,128,1,float16,fp8,0,0.3689039945602417
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,32,128,1,float16,float16,0,0.3361439943313599
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,32,128,1,float16,fp8,0,0.29063680171966555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,32,128,1,fp8,fp8,0,0.2899391889572144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,1,128,1,float16,float16,0,0.18269920349121094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,1,128,1,float16,fp8,0,0.19000320434570311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,1,128,1,fp8,fp8,0,0.1887935996055603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,2,128,1,float16,float16,0,0.18265440464019775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,2,128,1,float16,fp8,0,0.1896880030632019
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,2,128,1,fp8,fp8,0,0.18894399404525758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,4,128,1,float16,float16,0,0.19328800439834595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,4,128,1,float16,fp8,0,0.19062399864196777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,4,128,1,fp8,fp8,0,0.18896640539169313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,8,128,1,float16,float16,0,0.21290879249572753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,8,128,1,float16,fp8,0,0.18997440338134766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,32,8,128,1,fp8,fp8,0,0.1898784041404724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,32,128,1,float16,float16,0,0.17656480073928832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,32,128,1,float16,fp8,0,0.1517359972000122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,32,128,1,fp8,fp8,0,0.15171680450439454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,1,128,1,float16,fp8,0,0.10061119794845581
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,1,128,1,fp8,fp8,0,0.10064640045166015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,2,128,1,float16,float16,0,0.0998528003692627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,2,128,1,float16,fp8,0,0.1005519986152649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,2,128,1,fp8,fp8,0,0.10057599544525146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,4,128,1,float16,float16,0,0.1047584056854248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,32,8,128,1,fp8,fp8,0,0.3689840078353882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,4,128,1,float16,fp8,0,0.10087519884109497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,4,128,1,fp8,fp8,0,0.10063519477844238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,8,128,1,float16,float16,0,0.11374080181121826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,8,128,1,float16,fp8,0,0.10067039728164673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,8,128,1,fp8,fp8,0,0.1008255958557129
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,32,128,1,float16,float16,0,0.09444800019264221
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,32,128,1,float16,fp8,0,0.08052800297737121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,32,128,1,fp8,fp8,0,0.08115199804306031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,1,128,1,float16,float16,0,0.05539839863777161
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,1,128,1,float16,fp8,0,0.053467202186584475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,1,128,1,fp8,fp8,0,0.05351200103759766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,2,128,1,float16,float16,0,0.05469920039176941
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,2,128,1,float16,fp8,0,0.0535968005657196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,2,128,1,fp8,fp8,0,0.05353599786758423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,4,128,1,float16,float16,0,0.057571202516555786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,4,128,1,float16,fp8,0,0.05413280129432678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,4,128,1,fp8,fp8,0,0.05400320291519165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,8,128,1,float16,float16,0,0.061643201112747195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,8,128,1,float16,fp8,0,0.05542880296707153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,32,8,128,1,fp8,fp8,0,0.05535680055618286
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,32,1,128,1,float16,float16,0,0.09890080094337464
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,32,128,1,float16,float16,0,0.053723198175430295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,32,128,1,float16,fp8,0,0.04727199971675873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,32,128,1,fp8,fp8,0,0.047328001260757445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,1,128,1,float16,float16,0,0.033000001311302186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,1,128,1,float16,fp8,0,0.032953599095344545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,1,128,1,fp8,fp8,0,0.03299840092658997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,2,128,1,float16,fp8,0,0.032953599095344545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,2,128,1,fp8,fp8,0,0.03294560015201568
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,4,128,1,float16,float16,0,0.03377279937267304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,4,128,1,float16,fp8,0,0.03296000063419342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,4,128,1,fp8,fp8,0,0.032948800921440126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,8,128,1,float16,float16,0,0.03715839982032776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,8,128,1,fp8,fp8,0,0.03295519948005676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,32,128,1,float16,float16,0,0.026804798841476442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,32,128,1,float16,fp8,0,0.026782399415969847
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,32,128,1,fp8,fp8,0,0.02686559855937958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,1,128,1,float16,float16,0,0.020681600272655486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,1,128,1,float16,fp8,0,0.020694400370121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,1,128,1,fp8,fp8,0,0.02067680060863495
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,2,128,1,float16,float16,0,0.021035200357437132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,2,128,1,float16,fp8,0,0.020670400559902193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,2,128,1,fp8,fp8,0,0.02072319984436035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,4,128,1,float16,fp8,0,0.020615999400615693
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,4,128,1,fp8,fp8,0,0.020707200467586517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,8,128,1,float16,float16,0,0.022668799757957457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,8,128,1,float16,fp8,0,0.020708799362182617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,8,128,1,fp8,fp8,0,0.020588800311088562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,32,128,1,float16,float16,0,0.016735999286174773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,32,128,1,float16,fp8,0,0.01671839952468872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,32,128,1,fp8,fp8,0,0.017769600450992584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,1,128,1,float16,float16,0,0.0147024005651474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,1,128,1,float16,fp8,0,0.01464959979057312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,1,128,1,fp8,fp8,0,0.0146479994058609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,2,128,1,float16,float16,0,0.014526399970054626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,2,128,1,float16,fp8,0,0.014443199336528777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,2,128,1,fp8,fp8,0,0.014468799531459808
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,4,128,1,float16,float16,0,0.014505599439144135
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,4,128,1,float16,fp8,0,0.014575999975204468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,8,128,1,float16,fp8,0,0.03299359977245331
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,4,128,1,fp8,fp8,0,0.014449599385261535
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,8,128,1,float16,float16,0,0.01451680064201355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,8,128,1,float16,fp8,0,0.014560000598430633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,32,8,128,1,fp8,fp8,0,0.014510400593280792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,32,128,1,float16,float16,0,0.014561599493026734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,32,128,1,float16,fp8,0,0.014511999487876893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,32,128,1,fp8,fp8,0,0.014694400131702423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,1,128,1,float16,float16,0,0.012558400630950928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,1,128,1,float16,fp8,0,0.012542399764060973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,1,128,1,fp8,fp8,0,0.012516799569129943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,2,128,1,float16,float16,0,0.014545600116252898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,2,128,1,float16,fp8,0,0.01247519999742508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,2,128,1,fp8,fp8,0,0.013150399923324585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,4,128,1,float16,float16,0,0.012443199753761292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,4,128,1,float16,fp8,0,0.012534399330615998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,32,4,128,1,float16,float16,0,0.020659199357032774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,4,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,8,128,1,float16,fp8,0,0.012462399899959564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,8,128,1,fp8,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,32,128,1,float16,float16,0,0.013889600336551667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,32,128,1,float16,fp8,0,0.012531200051307678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,32,128,1,fp8,fp8,0,0.01255040019750595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,1,128,1,float16,float16,0,0.012534399330615998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,1,128,1,float16,fp8,0,0.012459199875593185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,1,128,1,fp8,fp8,0,0.0124719999730587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,2,128,1,float16,float16,0,0.012454400211572647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,2,128,1,float16,fp8,0,0.012441600114107132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,2,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,4,128,1,float16,float16,0,0.012488000094890594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,4,128,1,float16,fp8,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,4,128,1,fp8,fp8,0,0.012451200187206269
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,8,128,1,float16,float16,0,0.012455999851226807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,8,128,1,float16,fp8,0,0.012491200119256973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,32,8,128,1,fp8,fp8,0,0.012803199887275695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,32,2,128,1,float16,float16,0,0.03304319977760315
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,32,1,128,1,float16,float16,0,0.21748800277709962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,32,1,128,1,float16,fp8,0,0.22607359886169434
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,32,1,128,1,fp8,fp8,0,0.22686560153961183
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,32,2,128,1,float16,fp8,0,0.2262336015701294
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,32,2,128,1,fp8,fp8,0,0.22610559463500976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,32,8,128,1,float16,float16,0,0.014403200149536133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,32,4,128,1,float16,fp8,0,0.22646241188049315
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,32,4,128,1,fp8,fp8,0,0.22626879215240478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,32,8,128,1,float16,float16,0,0.24530720710754395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,32,8,128,1,float16,fp8,0,0.2269887924194336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,32,8,128,1,fp8,fp8,0,0.2270751953125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,32,128,1,float16,float16,0,0.18422880172729492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,32,2,128,1,float16,float16,0,0.21778879165649415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,32,128,1,float16,fp8,0,0.16826080083847045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,32,128,1,fp8,fp8,0,0.16776640415191652
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,1,128,1,float16,float16,0,0.11503839492797852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,1,128,1,float16,fp8,0,0.11741280555725098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,1,128,1,fp8,fp8,0,0.11862720251083374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,2,128,1,float16,float16,0,0.11516640186309815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,2,128,1,float16,fp8,0,0.11902079582214356
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,2,128,1,fp8,fp8,0,0.11715840101242066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,32,4,128,1,float16,float16,0,0.22607040405273438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,4,128,1,float16,fp8,0,0.11739039421081543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,4,128,1,float16,float16,0,0.11923680305480958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,4,128,1,fp8,fp8,0,0.11726880073547363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,8,128,1,float16,float16,0,0.12751519680023193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,8,128,1,float16,fp8,0,0.11840640306472779
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,32,8,128,1,fp8,fp8,0,0.11713600158691406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,32,128,1,float16,fp8,0,0.09020959734916686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,32,128,1,fp8,fp8,0,0.09031519889831544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,1,128,1,float16,float16,0,0.06366239786148072
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,1,128,1,float16,fp8,0,0.06368319988250733
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,1,128,1,fp8,fp8,0,0.06367999911308289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,2,128,1,float16,float16,0,0.0636575996875763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,2,128,1,float16,fp8,0,0.06363679766654969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,2,128,1,fp8,fp8,0,0.06383360028266907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,4,128,1,float16,float16,0,0.06582239866256714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,4,128,1,float16,fp8,0,0.06367200016975402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,8,128,1,float16,float16,0,0.07038080096244811
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,8,128,1,float16,fp8,0,0.06397119760513306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,8,128,1,fp8,fp8,0,0.063755202293396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,32,128,1,float16,float16,0,0.05292320251464844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,32,128,1,float16,fp8,0,0.04936160147190094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,32,128,1,fp8,fp8,0,0.049348801374435425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,1,128,1,float16,float16,0,0.034985598921775815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,1,128,1,float16,fp8,0,0.035041600465774536
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,1,128,1,fp8,fp8,0,0.03494240045547485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,2,128,1,float16,float16,0,0.0349727988243103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,2,128,1,float16,fp8,0,0.034990400075912476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,2,128,1,fp8,fp8,0,0.035025599598884585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,4,128,1,float16,float16,0,0.03513120114803314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,4,128,1,float16,fp8,0,0.034995201230049136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,4,128,1,fp8,fp8,0,0.035094401240348815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,8,128,1,float16,float16,0,0.03908959925174713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,8,128,1,float16,fp8,0,0.034964799880981445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,32,8,128,1,fp8,fp8,0,0.034980800747871396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,32,128,1,float16,float16,0,0.03089439868927002
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,32,128,1,float16,fp8,0,0.02889440059661865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,32,128,1,fp8,fp8,0,0.02887359857559204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,1,128,1,float16,float16,0,0.022724799811840057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,1,128,1,float16,fp8,0,0.02268960028886795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,1,128,1,fp8,fp8,0,0.02273920029401779
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,2,128,1,float16,float16,0,0.02268480062484741
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,2,128,1,float16,fp8,0,0.022753599286079406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,2,128,1,fp8,fp8,0,0.02272160053253174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,4,128,1,float16,float16,0,0.0227183997631073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,4,128,1,float16,fp8,0,0.02269600033760071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,4,128,1,fp8,fp8,0,0.022676800191402436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,8,128,1,float16,float16,0,0.0247311994433403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,8,128,1,float16,fp8,0,0.022726400196552275
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,32,8,128,1,fp8,fp8,0,0.02266400009393692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,32,128,1,float16,float16,0,0.01666879951953888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,32,128,1,float16,fp8,0,0.018606400489807128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,32,128,1,float16,float16,0,0.0965503990650177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,32,128,1,fp8,fp8,0,0.01863359957933426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,1,128,1,float16,float16,0,0.01454399973154068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,1,128,1,float16,fp8,0,0.014703999459743499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,2,128,1,float16,float16,0,0.014504000544548035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,2,128,1,float16,fp8,0,0.014788800477981567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,2,128,1,fp8,fp8,0,0.014508800208568573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,4,128,1,float16,float16,0,0.014692799746990204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,4,128,1,float16,fp8,0,0.0146479994058609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,4,128,1,fp8,fp8,0,0.014612799882888794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,8,128,1,float16,float16,0,0.016607999801635742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,8,128,1,float16,fp8,0,0.014611199498176575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,32,4,128,1,fp8,fp8,0,0.06383519768714904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,8,128,1,fp8,fp8,0,0.014604799449443817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,32,128,1,float16,fp8,0,0.012521600723266602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,32,128,1,fp8,fp8,0,0.012582400441169738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,1,128,1,float16,float16,0,0.012380799651145935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,1,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,1,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,2,128,1,float16,float16,0,0.010760000348091126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,2,128,1,float16,fp8,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,2,128,1,fp8,fp8,0,0.010609599947929382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,4,128,1,float16,float16,0,0.012374400347471236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,4,128,1,float16,fp8,0,0.010599999874830245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,4,128,1,fp8,fp8,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,8,128,1,float16,float16,0,0.012436799705028534
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,8,128,1,float16,fp8,0,0.012328000366687774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,8,128,1,fp8,fp8,0,0.012382400035858155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,32,128,1,float16,float16,0,0.012399999797344208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,32,128,1,float16,fp8,0,0.012368000298738479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,32,128,1,fp8,fp8,0,0.012380799651145935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,1,128,1,float16,float16,0,0.010611200332641601
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,1,128,1,float16,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,1,128,1,fp8,fp8,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,2,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,2,128,1,float16,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,32,1,128,1,fp8,fp8,0,0.014608000218868256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,2,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,4,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,4,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,4,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,8,128,1,float16,float16,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,8,128,1,float16,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,32,8,128,1,fp8,fp8,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,32,32,128,1,float16,float16,0,0.012606400251388549
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,32,128,1,float16,fp8,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,1,128,1,float16,float16,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,1,128,1,float16,fp8,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,1,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,2,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,2,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,2,128,1,fp8,fp8,0,0.010360000282526016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,4,128,1,float16,float16,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,4,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,4,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,8,128,1,float16,float16,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,8,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,8,128,1,fp8,fp8,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,32,1,128,1,float16,float16,0,0.18202240467071534
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,32,1,128,1,float16,fp8,0,0.18091679811477662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,32,1,128,1,fp8,fp8,0,0.18126720190048218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,32,2,128,1,float16,float16,0,0.18229759931564332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,32,2,128,1,float16,fp8,0,0.18065760135650635
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,32,2,128,1,fp8,fp8,0,0.18065439462661742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,32,4,128,1,float16,float16,0,0.18598719835281372
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,32,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,32,32,128,1,fp8,fp8,0,0.010628800094127654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,32,4,128,1,fp8,fp8,0,0.18053439855575562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,32,8,128,1,float16,float16,0,0.19424320459365846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,32,8,128,1,fp8,fp8,0,0.18060319423675536
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,32,8,128,1,float16,fp8,0,0.18046879768371582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,32,128,1,float16,float16,0,0.12731679677963256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,1,128,1,float16,float16,0,0.09670720100402833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,32,128,1,fp8,fp8,0,0.11922399997711182
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,1,128,1,float16,fp8,0,0.09445440173149108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,1,128,1,fp8,fp8,0,0.09446560144424439
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,2,128,1,float16,float16,0,0.09652159810066223
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,2,128,1,float16,fp8,0,0.09440960288047791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,2,128,1,fp8,fp8,0,0.09447360038757324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,4,128,1,float16,float16,0,0.09869120121002198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,4,128,1,float16,fp8,0,0.09438239932060241
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,4,128,1,fp8,fp8,0,0.09447680115699768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,8,128,1,float16,float16,0,0.10313600301742554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,8,128,1,float16,fp8,0,0.09452800154685974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,8,128,1,fp8,fp8,0,0.0943664014339447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,32,128,1,float16,float16,0,0.06895040273666382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,32,128,1,float16,fp8,0,0.06574079990386963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,32,128,1,fp8,fp8,0,0.06569280028343201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,1,128,1,float16,float16,0,0.051444798707962036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,1,128,1,float16,fp8,0,0.05135999917984009
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,1,128,1,fp8,fp8,0,0.05145919919013977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,2,128,1,float16,float16,0,0.05169919729232788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,2,128,1,float16,fp8,0,0.05130559802055359
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,2,128,1,fp8,fp8,0,0.05141760110855102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,4,128,1,float16,float16,0,0.053465598821640016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,4,128,1,float16,fp8,0,0.0514240026473999
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,4,128,1,fp8,fp8,0,0.05141440033912659
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,8,128,1,float16,float16,0,0.05745599865913391
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,32,4,128,1,float16,fp8,0,0.18052480220794678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,8,128,1,float16,fp8,0,0.05140799880027771
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,32,8,128,1,fp8,fp8,0,0.05139840245246887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,32,128,1,float16,fp8,0,0.03501279950141907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,32,128,1,fp8,fp8,0,0.035158398747444156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,1,128,1,float16,float16,0,0.030031999945640563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,1,128,1,float16,fp8,0,0.028934401273727418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,2,128,1,float16,float16,0,0.030752000212669373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,2,128,1,float16,fp8,0,0.02893120050430298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,32,32,128,1,float16,fp8,0,0.11913440227508545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,2,128,1,fp8,fp8,0,0.02895520031452179
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,4,128,1,float16,float16,0,0.030905601382255555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,4,128,1,float16,fp8,0,0.02897599935531616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,4,128,1,fp8,fp8,0,0.0288239985704422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,8,128,1,float16,float16,0,0.030985599756240843
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,8,128,1,float16,fp8,0,0.02890399992465973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,8,128,1,fp8,fp8,0,0.02886880040168762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,32,128,1,float16,float16,0,0.022702400386333466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,32,128,1,float16,fp8,0,0.02272319942712784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,32,128,1,fp8,fp8,0,0.02269120067358017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,1,128,1,float16,float16,0,0.02062080055475235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,1,128,1,float16,fp8,0,0.01977120041847229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,1,128,1,fp8,fp8,0,0.019622400403022766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,2,128,1,float16,float16,0,0.020577600598335265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,2,128,1,float16,fp8,0,0.019571200013160706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,2,128,1,fp8,fp8,0,0.019780799746513367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,4,128,1,float16,float16,0,0.02062560021877289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,4,128,1,float16,fp8,0,0.01879200041294098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,4,128,1,fp8,fp8,0,0.01873439997434616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,8,128,1,float16,float16,0,0.020764799416065217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,8,128,1,float16,fp8,0,0.01876640021800995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,32,8,128,1,fp8,fp8,0,0.0200080007314682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,32,128,1,float16,float16,0,0.037636798620223996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,32,128,1,float16,float16,0,0.015387199819087982
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,32,128,1,float16,fp8,0,0.014619199931621552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,32,128,1,fp8,fp8,0,0.014641599357128143
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,1,128,1,float16,float16,0,0.01451359987258911
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,1,128,1,float16,fp8,0,0.014414399862289429
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,1,128,1,fp8,fp8,0,0.014484800398349762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,2,128,1,float16,float16,0,0.014404800534248353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,2,128,1,float16,fp8,0,0.014433600008487701
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,2,128,1,fp8,fp8,0,0.013596799969673157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,4,128,1,float16,float16,0,0.014508800208568573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,4,128,1,float16,fp8,0,0.01436000019311905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,4,128,1,fp8,fp8,0,0.014526399970054626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,8,128,1,float16,float16,0,0.014448000490665436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,8,128,1,float16,fp8,0,0.014446400105953217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,32,8,128,1,fp8,fp8,0,0.014187200367450714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,32,128,1,float16,float16,0,0.012379200011491776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,32,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,32,128,1,fp8,fp8,0,0.012404800206422806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,1,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,1,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,2,128,1,float16,float16,0,0.010583999752998351
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,2,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,2,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,4,128,1,float16,float16,0,0.010523200035095215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,4,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,4,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,8,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,8,128,1,float16,fp8,0,0.010593599826097488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,8,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,32,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,32,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,32,128,1,fp8,fp8,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,1,128,1,float16,float16,0,0.010302399843931198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,1,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,32,1,128,1,fp8,fp8,0,0.028839999437332155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,1,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,2,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,2,128,1,float16,fp8,0,0.01032319962978363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,2,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,4,128,1,float16,fp8,0,0.010308799892663955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,4,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,8,128,1,float16,float16,0,0.010920000076293946
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,8,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,8,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,32,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,32,128,1,float16,fp8,0,0.010883200168609618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,32,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,1,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,1,128,1,float16,fp8,0,0.00952960029244423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,32,1,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,1,128,1,fp8,fp8,0,0.010313600301742554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,2,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,2,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,2,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,4,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,4,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,8,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,8,128,1,fp8,fp8,0,0.010345599800348281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,8,128,1,float16,fp8,0,0.010356800258159637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,32,1,128,1,float16,float16,0,0.15920640230178834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,32,1,128,1,fp8,fp8,0,0.15384639501571656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,32,2,128,1,float16,float16,0,0.1598080039024353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,32,2,128,1,float16,fp8,0,0.15388959646224976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,32,2,128,1,fp8,fp8,0,0.1538416028022766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,32,4,128,1,float16,float16,0,0.16136319637298585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,32,4,128,1,float16,fp8,0,0.1535264015197754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,32,4,128,1,fp8,fp8,0,0.15372320413589477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,32,4,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,32,8,128,1,float16,float16,0,0.16539520025253296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,32,8,128,1,float16,fp8,0,0.15375039577484131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,32,8,128,1,fp8,fp8,0,0.15385760068893434
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,32,128,1,float16,fp8,0,0.09440640211105347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,32,128,1,fp8,fp8,0,0.09439839720726013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,32,4,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,1,128,1,float16,float16,0,0.08418880105018615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,1,128,1,float16,fp8,0,0.08015679717063903
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,32,1,128,1,float16,fp8,0,0.15383520126342773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,2,128,1,float16,fp8,0,0.08022239804267883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,2,128,1,fp8,fp8,0,0.08022720217704774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,4,128,1,float16,float16,0,0.08624160289764404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,4,128,1,float16,fp8,0,0.08021759986877441
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,4,128,1,fp8,fp8,0,0.08025439977645873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,8,128,1,float16,float16,0,0.08826720118522643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,8,128,1,float16,fp8,0,0.08006880283355713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,8,128,1,fp8,fp8,0,0.08012800216674805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,32,128,1,float16,float16,0,0.05520960092544556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,32,128,1,float16,fp8,0,0.050860798358917235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,32,128,1,float16,float16,0,0.10001920461654663
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,32,128,1,fp8,fp8,0,0.05140320062637329
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,1,128,1,float16,float16,0,0.04728319942951202
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,1,128,1,float16,fp8,0,0.04520959854125976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,1,128,1,fp8,fp8,0,0.04526079893112182
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,1,128,1,fp8,fp8,0,0.08004639744758606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,2,128,1,float16,float16,0,0.04710719883441925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,2,128,1,float16,fp8,0,0.04528000056743622
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,2,128,1,fp8,fp8,0,0.04523679912090302
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,4,128,1,float16,float16,0,0.04733439981937408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,4,128,1,float16,fp8,0,0.04509440064430237
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,4,128,1,fp8,fp8,0,0.04519839882850647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,8,128,1,float16,float16,0,0.04732959866523743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,8,128,1,float16,fp8,0,0.04514400064945221
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,32,8,128,1,fp8,fp8,0,0.04517279863357544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,32,128,1,float16,float16,0,0.028966400027275085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,32,128,1,float16,fp8,0,0.028883200883865357
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,32,128,1,fp8,fp8,0,0.028889599442481994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,1,128,1,float16,float16,0,0.02688480019569397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,1,128,1,float16,fp8,0,0.02683520019054413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,1,128,1,fp8,fp8,0,0.026825600862503053
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,2,128,1,float16,float16,0,0.02699039876461029
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,2,128,1,float16,fp8,0,0.026767998933792114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,2,128,1,fp8,fp8,0,0.02677919864654541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,4,128,1,float16,float16,0,0.02885279953479767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,4,128,1,float16,fp8,0,0.026752001047134398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,4,128,1,fp8,fp8,0,0.0268095999956131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,8,128,1,float16,float16,0,0.02884800136089325
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,8,128,1,float16,fp8,0,0.026825600862503053
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,32,8,128,1,fp8,fp8,0,0.026817598938941957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,32,128,1,float16,float16,0,0.02014880031347275
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,32,128,1,float16,fp8,0,0.018643200397491455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,32,128,1,fp8,fp8,0,0.018713599443435668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,1,128,1,float16,float16,0,0.0186256006360054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,1,128,1,float16,fp8,0,0.017668800055980684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,1,128,1,fp8,fp8,0,0.017718400061130523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,2,128,1,float16,float16,0,0.018607999384403228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,2,128,1,fp8,fp8,0,0.018296000361442567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,4,128,1,float16,float16,0,0.018622399866580965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,4,128,1,float16,fp8,0,0.01767359972000122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,4,128,1,fp8,fp8,0,0.01801439970731735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,8,128,1,float16,float16,0,0.018641600012779237
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,8,128,1,float16,fp8,0,0.018615999817848207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,32,2,128,1,float16,float16,0,0.08418560028076172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,8,128,1,fp8,fp8,0,0.01855199933052063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,32,128,1,float16,float16,0,0.014612799882888794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,32,128,1,float16,fp8,0,0.014446400105953217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,32,128,1,fp8,fp8,0,0.014422400295734406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,1,128,1,float16,float16,0,0.01247360035777092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,1,128,1,float16,fp8,0,0.012561599910259246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,1,128,1,fp8,fp8,0,0.012454400211572647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,2,128,1,float16,float16,0,0.012566399574279786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,2,128,1,float16,fp8,0,0.012483199685811996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,2,128,1,fp8,fp8,0,0.012591999769210816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,4,128,1,float16,float16,0,0.012876799702644348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,4,128,1,float16,fp8,0,0.012588800489902496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,4,128,1,fp8,fp8,0,0.01242239996790886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,8,128,1,float16,float16,0,0.012630400061607362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,8,128,1,float16,fp8,0,0.012427199631929398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,32,8,128,1,fp8,fp8,0,0.012577599287033081
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,32,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,32,128,1,float16,fp8,0,0.010558400303125381
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,32,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,1,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,1,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,1,128,1,fp8,fp8,0,0.0106175996363163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,2,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,2,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,4,128,1,float16,float16,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,4,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,4,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,8,128,1,float16,float16,0,0.010601600259542465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,8,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,8,128,1,fp8,fp8,0,0.01029760017991066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,32,128,1,float16,fp8,0,0.010358399897813796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,32,2,128,1,float16,fp8,0,0.01799200028181076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,32,128,1,fp8,fp8,0,0.01029760017991066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,1,128,1,float16,float16,0,0.010367999970912933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,1,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,1,128,1,fp8,fp8,0,0.009070400148630142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,2,128,1,float16,fp8,0,0.009193599969148637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,2,128,1,fp8,fp8,0,0.009427200257778167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,4,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,4,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,4,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,8,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,8,128,1,float16,fp8,0,0.009694399684667588
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,8,128,1,fp8,fp8,0,0.009838400036096573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,32,128,1,float16,float16,0,0.011318399757146835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,32,128,1,float16,fp8,0,0.008377599716186523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,32,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,1,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,1,128,1,float16,fp8,0,0.009796799719333648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,1,128,1,fp8,fp8,0,0.009812799841165542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,2,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,2,128,1,float16,fp8,0,0.009601599723100662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,2,128,1,fp8,fp8,0,0.009822399914264679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,32,2,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,4,128,1,float16,fp8,0,0.009598399698734283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,4,128,1,fp8,fp8,0,0.00960479974746704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,32,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,8,128,1,float16,float16,0,0.009139200299978256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,8,128,1,float16,fp8,0,0.008367999643087386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,8,128,1,fp8,fp8,0,0.008392000198364257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,32,2,128,1,float16,float16,0,0.010358399897813796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,32,1,128,1,float16,float16,0,0.1532688021659851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,32,1,128,1,float16,fp8,0,0.14358880519866943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,32,1,128,1,fp8,fp8,0,0.14367200136184693
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,32,2,128,1,float16,float16,0,0.1535871982574463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,32,2,128,1,float16,fp8,0,0.14360640048980713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,32,2,128,1,fp8,fp8,0,0.14376000165939332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,32,4,128,1,float16,float16,0,0.15405280590057374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,32,4,128,1,float16,fp8,0,0.14386080503463744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,32,4,128,1,fp8,fp8,0,0.14362239837646484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,32,8,128,1,float16,float16,0,0.15672800540924073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,32,8,128,1,float16,fp8,0,0.14368480443954468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,32,8,128,1,fp8,fp8,0,0.14390720129013063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,32,128,1,float16,float16,0,0.08983520269393921
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,32,128,1,float16,fp8,0,0.08251519799232483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,32,128,1,fp8,fp8,0,0.08211680054664612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,1,128,1,float16,float16,0,0.08015679717063903
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,1,128,1,fp8,fp8,0,0.07599520087242126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,32,4,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,2,128,1,float16,float16,0,0.08013920187950134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,2,128,1,float16,fp8,0,0.07607359886169433
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,2,128,1,fp8,fp8,0,0.07606239914894104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,4,128,1,float16,float16,0,0.08023200035095215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,4,128,1,float16,fp8,0,0.07608320116996765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,4,128,1,fp8,fp8,0,0.07594239711761475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,8,128,1,float16,float16,0,0.08210560083389282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,8,128,1,float16,fp8,0,0.07598080039024353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,8,128,1,fp8,fp8,0,0.07603679895401001
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,32,128,1,float16,fp8,0,0.04529919922351837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,32,128,1,fp8,fp8,0,0.04532159864902496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,1,128,1,float16,float16,0,0.04532000124454498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,1,128,1,float16,fp8,0,0.04324640035629272
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,1,128,1,fp8,fp8,0,0.04317759871482849
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,2,128,1,float16,float16,0,0.04527359902858734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,2,128,1,float16,fp8,0,0.043244799971580504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,2,128,1,fp8,fp8,0,0.04319039881229401
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,4,128,1,float16,float16,0,0.04556640088558197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,4,128,1,fp8,fp8,0,0.043140798807144165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,8,128,1,float16,float16,0,0.045270401239395144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,8,128,1,float16,fp8,0,0.043112000823020934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,32,1,128,1,float16,fp8,0,0.07596319913864136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,8,128,1,fp8,fp8,0,0.04310399889945984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,32,128,1,float16,float16,0,0.028838399052619933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,32,128,1,float16,fp8,0,0.026982399821281432
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,32,128,1,fp8,fp8,0,0.026819199323654175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,1,128,1,float16,float16,0,0.026929599046707154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,1,128,1,float16,fp8,0,0.024846400320529937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,1,128,1,fp8,fp8,0,0.02497600018978119
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,2,128,1,float16,float16,0,0.026812800765037538
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,2,128,1,float16,fp8,0,0.02661919891834259
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,4,128,1,float16,float16,0,0.026947200298309326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,4,128,1,float16,fp8,0,0.026576000452041625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,4,128,1,fp8,fp8,0,0.02664639949798584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,32,128,1,float16,float16,0,0.047224000096321106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,8,128,1,float16,float16,0,0.027003198862075806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,8,128,1,float16,fp8,0,0.02476159930229187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,8,128,1,fp8,fp8,0,0.026475200057029726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,32,128,1,float16,float16,0,0.018673600256443025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,32,128,1,float16,fp8,0,0.018603199720382692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,32,128,1,fp8,fp8,0,0.01860000044107437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,1,128,1,float16,float16,0,0.018479999899864197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,1,128,1,float16,fp8,0,0.016684800386428833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,1,128,1,fp8,fp8,0,0.01669120043516159
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,2,128,1,float16,float16,0,0.018545599281787874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,2,128,1,float16,fp8,0,0.016675199568271636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,2,128,1,fp8,fp8,0,0.016967999935150146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,4,128,1,float16,fp8,0,0.01669120043516159
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,4,128,1,fp8,fp8,0,0.016675199568271636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,8,128,1,float16,float16,0,0.018606400489807128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,8,128,1,float16,fp8,0,0.016596800088882445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,32,4,128,1,float16,fp8,0,0.0432096004486084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,8,128,1,fp8,fp8,0,0.01680160015821457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,32,128,1,float16,float16,0,0.01446399986743927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,32,128,1,float16,fp8,0,0.01247360035777092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,32,128,1,fp8,fp8,0,0.012481600046157837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,1,128,1,float16,float16,0,0.012455999851226807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,1,128,1,fp8,fp8,0,0.012462399899959564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,2,128,1,float16,float16,0,0.012454400211572647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,2,128,1,float16,fp8,0,0.012454400211572647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,2,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,4,128,1,float16,float16,0,0.012569600343704223
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,4,128,1,float16,fp8,0,0.0125231996178627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,32,2,128,1,fp8,fp8,0,0.024747200310230255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,4,128,1,fp8,fp8,0,0.012539200484752655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,8,128,1,float16,float16,0,0.012600000202655792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,8,128,1,float16,fp8,0,0.012488000094890594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,8,128,1,fp8,fp8,0,0.012556800246238708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,32,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,32,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,32,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,1,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,1,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,1,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,2,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,2,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,4,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,32,4,128,1,float16,float16,0,0.018544000387191773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,4,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,4,128,1,fp8,fp8,0,0.01029760017991066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,8,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,8,128,1,float16,fp8,0,0.0103472001850605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,8,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,32,128,1,float16,float16,0,0.010608000308275222
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,32,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,32,128,1,fp8,fp8,0,0.010361599922180175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,32,1,128,1,float16,fp8,0,0.012681600451469422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,1,128,1,float16,float16,0,0.010311999917030334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,1,128,1,fp8,fp8,0,0.010304000228643417
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,2,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,2,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,2,128,1,fp8,fp8,0,0.010335999727249145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,4,128,1,float16,float16,0,0.010292799770832061
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,4,128,1,float16,fp8,0,0.010328000038862228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,4,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,8,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,8,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,8,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,32,128,1,float16,float16,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,32,128,1,float16,fp8,0,0.010361599922180175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,32,128,1,fp8,fp8,0,0.010022400319576264
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,1,128,1,float16,float16,0,0.01029760017991066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,1,128,1,float16,fp8,0,0.008633600175380706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,1,128,1,fp8,fp8,0,0.008804800361394883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,2,128,1,float16,float16,0,0.010291200131177902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,2,128,1,float16,fp8,0,0.008872000128030777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,2,128,1,fp8,fp8,0,0.008932799845933915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,4,128,1,float16,float16,0,0.010358399897813796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,4,128,1,float16,fp8,0,0.010342399775981902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,4,128,1,fp8,fp8,0,0.010345599800348281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,8,128,1,float16,float16,0,0.010371199995279311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,32,1,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,8,128,1,fp8,fp8,0,0.010294400155544281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,32,1,128,1,float16,float16,0,0.15077439546585084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,32,1,128,1,float16,fp8,0,0.13950560092926026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,32,1,128,1,fp8,fp8,0,0.13959200382232667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,0,0.14995360374450684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,0,0.13942879438400269
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,32,2,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,32,2,128,1,fp8,fp8,0,0.13949120044708252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,0,0.15115360021591187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,0,0.139518404006958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,32,4,128,1,fp8,fp8,0,0.13949600458145142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,0,0.1512895941734314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,0,0.13948960304260255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,0,0.08005920052528381
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,0,0.07446879744529725
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,32,128,1,fp8,fp8,0,0.07390879988670349
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,1,128,1,float16,float16,0,0.08000479936599732
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,1,128,1,float16,fp8,0,0.07392320036888123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,1,128,1,fp8,fp8,0,0.07416160106658935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,0,0.08002399802207946
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,0,0.0739296019077301
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,2,128,1,fp8,fp8,0,0.07401919960975648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,0,0.08005759716033936
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,0,0.07392479777336121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,4,128,1,fp8,fp8,0,0.07400640249252319
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,0,0.08007519841194152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,0,0.07396960258483887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,32,8,128,1,float16,fp8,0,0.010299199819564819
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,32,8,128,1,fp8,fp8,0,0.0739184021949768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,0,0.04525440037250519
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,0,0.04140639901161194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,1,128,1,float16,float16,0,0.0438975989818573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,1,128,1,float16,fp8,0,0.041289600729942325
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,1,128,1,fp8,fp8,0,0.04118080139160156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,0,0.04519680142402649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,0,0.0411296010017395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,2,128,1,fp8,fp8,0,0.041247999668121337
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,0,0.04374560117721558
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,0,0.041150400042533876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,4,128,1,fp8,fp8,0,0.04117920100688934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,0,0.04520159959793091
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,0,0.041315200924873355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,8,128,1,fp8,fp8,0,0.04127680063247681
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,0,0.026811200380325317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,32,128,1,fp8,fp8,0,0.024799999594688416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,1,128,1,float16,float16,0,0.026715201139450074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,1,128,1,float16,fp8,0,0.024780799448490144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,1,128,1,fp8,fp8,0,0.024772800505161285
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,32,8,128,1,fp8,fp8,0,0.13946080207824707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,0,0.026790401339530943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,0,0.02475520074367523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,2,128,1,fp8,fp8,0,0.024780799448490144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,0,0.026859200000762938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,0,0.02476480007171631
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,4,128,1,fp8,fp8,0,0.025019198656082153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,0,0.026836800575256347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,0,0.024956800043582916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,8,128,1,fp8,fp8,0,0.024742400646209715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,0,0.018702399730682374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,0,0.016681599617004394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,32,128,1,fp8,fp8,0,0.017401599884033205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,1,128,1,float16,float16,0,0.018617600202560425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,1,128,1,fp8,fp8,0,0.016627199947834015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,1,128,1,float16,fp8,0,0.016734400391578676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,0,0.016811199486255646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,32,32,128,1,fp8,fp8,0,0.04147680103778839
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,2,128,1,fp8,fp8,0,0.016622400283813475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,0,0.018460799753665925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,0,0.016604800522327424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,4,128,1,fp8,fp8,0,0.016577599942684172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,0,0.01857919991016388
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,0,0.01658719927072525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,8,128,1,fp8,fp8,0,0.016678400337696075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,0,0.01451839953660965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,0,0.02484479993581772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,32,128,1,fp8,fp8,0,0.012569600343704223
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,1,128,1,float16,float16,0,0.013382400572299957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,1,128,1,float16,fp8,0,0.012612800300121307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,0,0.01441120058298111
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,0,0.012518399953842163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,2,128,1,fp8,fp8,0,0.01266240030527115
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,0,0.013532799482345582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,0,0.012531200051307678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,4,128,1,fp8,fp8,0,0.012534399330615998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,0,0.01271200031042099
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,0,0.012529599666595458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,8,128,1,fp8,fp8,0,0.012664000689983367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,0,0.01868959963321686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,0,0.01064639985561371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,32,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,1,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,1,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,1,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,2,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,0,0.010590399801731109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,4,128,1,fp8,fp8,0,0.010337600111961364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,0,0.010300800204277039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,8,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,0,0.010564800351858139
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,0,0.010345599800348281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,32,1,128,1,fp8,fp8,0,0.012457600235939026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,32,128,1,fp8,fp8,0,0.010344000160694122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,1,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,1,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,0,0.010308799892663955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,2,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,4,128,1,fp8,fp8,0,0.010334400087594986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,0,0.010294400155544281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,0,0.010318399965763092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,8,128,1,fp8,fp8,0,0.010329599678516387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,0,0.010318399965763092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,32,128,1,fp8,fp8,0,0.009419199824333192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,1,128,1,float16,float16,0,0.010291200131177902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,1,128,1,float16,fp8,0,0.00923679992556572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,1,128,1,fp8,fp8,0,0.00931679978966713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,0,0.010286399722099304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,0,0.009558399766683578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,2,128,1,fp8,fp8,0,0.010315199941396713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,0,0.010283199697732925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,0,0.010294400155544281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,4,128,1,fp8,fp8,0,0.008550400286912918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,0,0.010294400155544281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,0,0.010302399843931198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,32,8,128,1,fp8,fp8,0,0.008470399677753449
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,32,1,128,1,fp8,fp8,0,0.009576000273227692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,24,1,128,1,float16,fp8,0,5.869998550415039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,24,1,128,1,fp8,fp8,0,5.884072113037109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,24,1,128,1,float16,float16,0,8.088159942626953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,24,2,128,1,float16,float16,0,7.68756332397461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,24,2,128,1,float16,fp8,0,5.920095825195313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,24,2,128,1,fp8,fp8,0,5.924124908447266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,24,4,128,1,float16,fp8,0,5.970873641967773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,24,4,128,1,float16,float16,0,8.136654663085938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,24,4,128,1,fp8,fp8,0,5.915588760375977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,24,8,128,1,float16,float16,0,7.835091400146484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,24,8,128,1,float16,fp8,0,6.015171051025391
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,24,8,128,1,fp8,fp8,0,6.095619201660156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,1,128,1,float16,float16,0,4.007483291625976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,24,128,1,fp8,fp8,0,3.1317520141601562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,1,128,1,float16,fp8,0,3.2575393676757813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,24,128,1,float16,fp8,0,3.1183311462402346
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,1,128,1,fp8,fp8,0,2.989686393737793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,2,128,1,float16,fp8,0,2.9688783645629884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,2,128,1,float16,float16,0,3.9727760314941407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,2,128,1,fp8,fp8,0,2.9994415283203124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,4,128,1,float16,float16,0,3.751851272583008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,4,128,1,float16,fp8,0,3.277967834472656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,4,128,1,fp8,fp8,0,3.0135984420776367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,8,128,1,float16,fp8,0,3.01757755279541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,8,128,1,fp8,fp8,0,3.0075807571411133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,8,128,1,float16,float16,0,4.013750457763672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,24,128,1,float16,fp8,0,1.624844741821289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,24,128,1,fp8,fp8,0,1.9983055114746093
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,1,128,1,float16,float16,0,1.856443214416504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,1,128,1,float16,fp8,0,1.6530031204223632
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,1,128,1,fp8,fp8,0,1.7790000915527344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,2,128,1,float16,float16,0,1.8564239501953126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,2,128,1,float16,fp8,0,1.6619264602661132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,2,128,1,fp8,fp8,0,1.5392496109008789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,4,128,1,float16,float16,0,1.8718992233276368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,4,128,1,float16,fp8,0,1.5277456283569335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,4,128,1,fp8,fp8,0,1.8244991302490234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,8,128,1,float16,float16,0,1.8671503067016602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,8,128,1,float16,fp8,0,1.5478976249694825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,8,128,1,fp8,fp8,0,1.7100879669189453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,24,128,1,float16,fp8,0,0.8671664237976074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,24,128,1,fp8,fp8,0,0.8624480247497559
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,1,128,1,float16,float16,0,0.9526320457458496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,1,128,1,float16,fp8,0,0.8123711585998535
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,1,128,1,fp8,fp8,0,0.8057632446289062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,2,128,1,float16,float16,0,0.951360034942627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,2,128,1,float16,fp8,0,0.8084624290466309
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,2,128,1,fp8,fp8,0,0.8134847640991211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,4,128,1,float16,float16,0,0.9639599800109864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,4,128,1,float16,fp8,0,0.8542544364929199
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,4,128,1,fp8,fp8,0,0.864129638671875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,8,128,1,float16,float16,0,0.9807791709899902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,8,128,1,float16,fp8,0,0.8101119995117188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,8,128,1,fp8,fp8,0,0.8270048141479492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,24,1,128,1,float16,float16,0,4.205212783813477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,24,1,128,1,float16,fp8,0,3.5881935119628907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,24,1,128,1,fp8,fp8,0,3.4533615112304688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,24,2,128,1,float16,float16,0,4.299265670776367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,24,2,128,1,float16,fp8,0,3.6469120025634765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,24,2,128,1,fp8,fp8,0,3.5025089263916014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,24,4,128,1,float16,float16,0,4.334158325195313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,24,4,128,1,float16,fp8,0,3.5843551635742186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,24,4,128,1,fp8,fp8,0,3.499262237548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,24,8,128,1,float16,float16,0,4.2558128356933596
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,24,8,128,1,float16,fp8,0,3.55286865234375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,24,8,128,1,fp8,fp8,0,3.564558410644531
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,24,24,128,1,float16,float16,0,1.0436063766479493
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,24,24,128,1,float16,float16,0,3.854414367675781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,24,128,1,float16,fp8,0,1.8931743621826171
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,24,128,1,float16,float16,0,2.2751440048217773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,24,24,128,1,float16,float16,0,2.0536272048950197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,24,128,1,fp8,fp8,0,2.023107147216797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,1,128,1,float16,float16,0,2.103001594543457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,1,128,1,fp8,fp8,0,1.8040191650390625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,2,128,1,float16,float16,0,2.068974494934082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,2,128,1,float16,fp8,0,2.0609216690063477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,2,128,1,fp8,fp8,0,1.7967184066772461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,4,128,1,float16,float16,0,2.2340320587158202
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,4,128,1,float16,fp8,0,1.7613712310791017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,1,128,1,float16,fp8,0,1.9653360366821289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,4,128,1,fp8,fp8,0,1.7894943237304688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,24,128,1,float16,float16,0,1.2226767539978027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,8,128,1,float16,float16,0,2.3123872756958006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,8,128,1,float16,fp8,0,1.7665824890136719
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,24,8,128,1,fp8,fp8,0,1.7597423553466798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,24,128,1,float16,fp8,0,1.0510751724243164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,24,128,1,fp8,fp8,0,1.1139424324035645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,1,128,1,float16,float16,0,1.0857999801635743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,1,128,1,float16,fp8,0,0.912940788269043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,1,128,1,fp8,fp8,0,0.9177375793457031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,2,128,1,float16,fp8,0,0.9180591583251954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,2,128,1,float16,float16,0,1.0795151710510253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,4,128,1,float16,float16,0,1.1254207611083984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,4,128,1,float16,fp8,0,0.9154800415039063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,4,128,1,fp8,fp8,0,0.9195199966430664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,8,128,1,float16,fp8,0,0.9191935539245606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,8,128,1,fp8,fp8,0,1.006553554534912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,8,128,1,float16,float16,0,1.1545743942260742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,24,128,1,float16,float16,0,0.6382271766662597
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,24,128,1,float16,fp8,0,0.5659679889678955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,24,128,1,fp8,fp8,0,0.5702271938323975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,1,128,1,float16,fp8,0,0.4915440082550049
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,1,128,1,float16,float16,0,0.5805488109588623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,1,128,1,fp8,fp8,0,0.539031982421875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,24,2,128,1,fp8,fp8,0,0.9142383575439453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,2,128,1,float16,fp8,0,0.5081264019012451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,2,128,1,float16,float16,0,0.5724624156951904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,4,128,1,float16,float16,0,0.5744287967681885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,4,128,1,float16,fp8,0,0.5052800178527832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,4,128,1,fp8,fp8,0,0.49258718490600584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,8,128,1,float16,float16,0,0.5862336158752441
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,8,128,1,float16,fp8,0,0.49532642364501955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,2,128,1,fp8,fp8,0,0.49320321083068847
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,24,1,128,1,float16,fp8,0,2.4892240524291993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,24,1,128,1,float16,float16,0,2.8872047424316407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,24,8,128,1,fp8,fp8,0,0.49327678680419923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,24,1,128,1,fp8,fp8,0,2.4870223999023438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,24,2,128,1,float16,fp8,0,2.663075256347656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,24,2,128,1,float16,float16,0,2.992263984680176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,24,2,128,1,fp8,fp8,0,2.476940727233887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,24,4,128,1,float16,fp8,0,2.483273506164551
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,24,4,128,1,float16,float16,0,3.1685232162475585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,24,4,128,1,fp8,fp8,0,2.5968496322631838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,24,8,128,1,float16,float16,0,3.009006309509277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,24,128,1,float16,float16,0,1.682481575012207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,24,8,128,1,float16,fp8,0,2.5043712615966798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,24,128,1,float16,fp8,0,1.4364959716796875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,24,8,128,1,fp8,fp8,0,2.6840335845947267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,24,128,1,fp8,fp8,0,1.4302144050598145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,1,128,1,float16,float16,0,1.481043243408203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,1,128,1,float16,fp8,0,1.2703696250915528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,1,128,1,fp8,fp8,0,1.284830379486084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,2,128,1,float16,float16,0,1.4999088287353515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,2,128,1,float16,fp8,0,1.2690383911132812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,2,128,1,fp8,fp8,0,1.2827728271484375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,4,128,1,float16,fp8,0,1.2713520050048828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,4,128,1,fp8,fp8,0,1.2778944015502929
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,8,128,1,float16,fp8,0,1.2807087898254395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,8,128,1,float16,float16,0,1.5366352081298829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,24,128,1,float16,float16,0,0.8746928215026856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,8,128,1,fp8,fp8,0,1.279203224182129
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,24,4,128,1,float16,float16,0,1.5443023681640624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,24,128,1,fp8,fp8,0,0.7183824062347413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,1,128,1,float16,fp8,0,0.665987205505371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,1,128,1,float16,float16,0,0.7782000064849853
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,1,128,1,fp8,fp8,0,0.6646687984466553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,2,128,1,float16,fp8,0,0.6731599807739258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,2,128,1,float16,float16,0,0.7951151847839355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,2,128,1,fp8,fp8,0,0.6652143955230713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,4,128,1,float16,fp8,0,0.6743775844573975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,4,128,1,float16,float16,0,0.7928287982940674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,4,128,1,fp8,fp8,0,0.6640399932861328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,8,128,1,float16,fp8,0,0.6689727783203125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,8,128,1,float16,float16,0,0.8242879867553711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,24,128,1,float16,fp8,0,0.8036640167236329
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,24,8,128,1,fp8,fp8,0,0.6671616077423096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,24,128,1,float16,fp8,0,0.395635199546814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,24,128,1,fp8,fp8,0,0.3899087905883789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,1,128,1,float16,fp8,0,0.36295840740203855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,1,128,1,fp8,fp8,0,0.3621520042419434
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,2,128,1,float16,float16,0,0.4297135829925537
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,2,128,1,float16,fp8,0,0.3612303972244263
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,2,128,1,fp8,fp8,0,0.3637104034423828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,4,128,1,float16,float16,0,0.4209792137145996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,4,128,1,float16,fp8,0,0.3643199920654297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,4,128,1,fp8,fp8,0,0.3627887964248657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,8,128,1,float16,float16,0,0.4407360076904297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,8,128,1,float16,fp8,0,0.3638672113418579
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,8,128,1,fp8,fp8,0,0.3693167924880981
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,24,128,1,float16,float16,0,0.474560022354126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,24,1,128,1,float16,float16,0,0.41428160667419434
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,24,1,128,1,float16,fp8,0,3.297415924072266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,24,1,128,1,float16,float16,0,3.8262977600097656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,24,1,128,1,fp8,fp8,0,3.2915328979492187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,24,2,128,1,float16,float16,0,3.9560352325439454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,24,2,128,1,float16,fp8,0,3.348068618774414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,24,2,128,1,fp8,fp8,0,3.4537696838378906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,24,4,128,1,float16,float16,0,3.9878353118896483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,24,4,128,1,float16,fp8,0,3.5564990997314454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,24,4,128,1,fp8,fp8,0,3.3235855102539062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,24,8,128,1,float16,float16,0,4.019252777099609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,24,8,128,1,float16,fp8,0,3.4743183135986326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,24,8,128,1,fp8,fp8,0,3.3006481170654296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,24,128,1,float16,float16,0,2.276795196533203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,24,128,1,float16,fp8,0,1.9310192108154296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,24,128,1,fp8,fp8,0,1.8245183944702148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,1,128,1,float16,float16,0,1.9149360656738281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,1,128,1,float16,fp8,0,1.8297008514404296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,1,128,1,fp8,fp8,0,1.6664047241210938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,2,128,1,float16,float16,0,1.955828857421875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,2,128,1,float16,fp8,0,1.868886375427246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,2,128,1,fp8,fp8,0,1.6743087768554688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,4,128,1,float16,float16,0,1.9559808731079102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,4,128,1,float16,fp8,0,1.8289583206176758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,4,128,1,fp8,fp8,0,1.6745855331420898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,8,128,1,float16,float16,0,2.0651983261108398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,24,128,1,float16,float16,0,1.1457504272460937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,8,128,1,float16,fp8,0,1.8544544219970702
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,24,8,128,1,fp8,fp8,0,1.6808303833007812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,24,128,1,float16,fp8,0,1.0980607986450195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,24,128,1,fp8,fp8,0,0.9638879776000977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,1,128,1,float16,float16,0,1.015272045135498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,1,128,1,float16,fp8,0,0.8595024108886719
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,1,128,1,fp8,fp8,0,0.9053695678710938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,2,128,1,float16,float16,0,0.9911567687988281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,2,128,1,float16,fp8,0,0.965675163269043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,2,128,1,fp8,fp8,0,0.8592975616455079
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,4,128,1,float16,float16,0,1.027950382232666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,4,128,1,float16,fp8,0,0.8963343620300293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,4,128,1,fp8,fp8,0,0.9670528411865235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,8,128,1,float16,float16,0,1.0434831619262694
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,8,128,1,float16,fp8,0,0.9548656463623046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,24,128,1,float16,fp8,0,0.5148447990417481
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,24,8,128,1,fp8,fp8,0,0.9318415641784668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,24,128,1,fp8,fp8,0,0.5476079940795898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,1,128,1,float16,float16,0,0.525387191772461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,1,128,1,float16,fp8,0,0.48684158325195315
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,1,128,1,fp8,fp8,0,0.4647503852844238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,2,128,1,float16,float16,0,0.5269887924194336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,2,128,1,float16,fp8,0,0.4549903869628906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,2,128,1,fp8,fp8,0,0.4563439846038818
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,4,128,1,float16,float16,0,0.5417119979858398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,4,128,1,float16,fp8,0,0.45518879890441893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,8,128,1,float16,fp8,0,0.456390380859375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,8,128,1,float16,float16,0,0.5604735851287842
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,24,128,1,float16,float16,0,0.3399183988571167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,8,128,1,fp8,fp8,0,0.45717759132385255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,24,128,1,float16,fp8,0,0.28292479515075686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,24,128,1,fp8,fp8,0,0.2750175952911377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,24,128,1,float16,float16,0,0.5993311882019043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,1,128,1,float16,float16,0,0.2898000001907349
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,1,128,1,float16,fp8,0,0.25203039646148684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,1,128,1,fp8,fp8,0,0.25556159019470215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,2,128,1,float16,float16,0,0.2846496105194092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,2,128,1,float16,fp8,0,0.25625760555267335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,2,128,1,fp8,fp8,0,0.25333759784698484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,4,128,1,float16,float16,0,0.29215359687805176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,4,128,1,float16,fp8,0,0.25217280387878416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,4,128,1,fp8,fp8,0,0.25305440425872805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,8,128,1,float16,float16,0,0.2956576108932495
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,24,4,128,1,fp8,fp8,0,0.4573472023010254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,8,128,1,float16,fp8,0,0.2538399934768677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,24,8,128,1,fp8,fp8,0,0.252455997467041
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,24,1,128,1,float16,fp8,0,1.9841615676879882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,24,1,128,1,fp8,fp8,0,1.981729507446289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,24,1,128,1,float16,float16,0,2.3105119705200194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,24,2,128,1,float16,fp8,0,1.98056640625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,24,2,128,1,float16,float16,0,2.287406349182129
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,24,2,128,1,fp8,fp8,0,2.0818096160888673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,24,4,128,1,float16,float16,0,2.2870624542236326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,24,4,128,1,float16,fp8,0,2.186177635192871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,24,8,128,1,float16,float16,0,2.4378528594970703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,24,128,1,float16,float16,0,1.4059120178222657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,24,8,128,1,fp8,fp8,0,2.004315185546875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,24,8,128,1,float16,fp8,0,2.140006446838379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,24,128,1,float16,fp8,0,1.1538623809814452
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,24,128,1,fp8,fp8,0,1.1289039611816407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,24,4,128,1,fp8,fp8,0,1.9823152542114257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,1,128,1,float16,fp8,0,1.011734390258789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,1,128,1,fp8,fp8,0,1.0154272079467774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,2,128,1,float16,float16,0,1.1371423721313476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,2,128,1,float16,fp8,0,1.0129535675048829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,2,128,1,fp8,fp8,0,1.0117568016052245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,4,128,1,float16,float16,0,1.1703968048095703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,4,128,1,float16,fp8,0,1.0215807914733888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,4,128,1,fp8,fp8,0,1.013259220123291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,1,128,1,float16,float16,0,1.1423359870910645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,8,128,1,float16,float16,0,1.211575984954834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,8,128,1,float16,fp8,0,1.0186703681945801
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,24,8,128,1,fp8,fp8,0,1.0152912139892578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,24,128,1,float16,float16,0,0.7051807880401612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,24,128,1,float16,fp8,0,0.6344287872314454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,24,128,1,fp8,fp8,0,0.5862607955932617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,1,128,1,float16,float16,0,0.5942304134368896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,1,128,1,fp8,fp8,0,0.5376688003540039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,2,128,1,float16,float16,0,0.6087103843688965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,2,128,1,float16,fp8,0,0.5349232196807862
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,2,128,1,fp8,fp8,0,0.5268752098083496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,4,128,1,float16,float16,0,0.6238959789276123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,4,128,1,float16,fp8,0,0.5305840015411377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,4,128,1,fp8,fp8,0,0.5282815933227539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,8,128,1,float16,fp8,0,0.5295648097991943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,8,128,1,float16,float16,0,0.6430895805358887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,8,128,1,fp8,fp8,0,0.530017614364624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,24,128,1,float16,float16,0,0.37981600761413575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,24,128,1,fp8,fp8,0,0.3153712034225464
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,1,128,1,float16,float16,0,0.31777119636535645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,1,128,1,float16,fp8,0,0.2836496114730835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,1,128,1,fp8,fp8,0,0.28612799644470216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,2,128,1,float16,float16,0,0.31574559211730957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,24,1,128,1,float16,fp8,0,0.5258416175842285
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,2,128,1,fp8,fp8,0,0.28430240154266356
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,4,128,1,float16,float16,0,0.32069599628448486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,4,128,1,float16,fp8,0,0.28508639335632324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,4,128,1,fp8,fp8,0,0.28447680473327636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,8,128,1,float16,float16,0,0.3342943906784058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,8,128,1,float16,fp8,0,0.28519840240478517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,8,128,1,fp8,fp8,0,0.28533120155334474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,24,128,1,float16,fp8,0,0.1786944031715393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,24,128,1,fp8,fp8,0,0.17853920459747313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,24,128,1,float16,fp8,0,0.34191360473632815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,1,128,1,float16,float16,0,0.17752959728240966
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,1,128,1,float16,fp8,0,0.16219680309295653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,1,128,1,fp8,fp8,0,0.16166880130767822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,2,128,1,float16,float16,0,0.18178880214691162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,2,128,1,float16,fp8,0,0.16212639808654786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,2,128,1,fp8,fp8,0,0.1618783950805664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,4,128,1,float16,float16,0,0.17949119806289673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,4,128,1,float16,fp8,0,0.16179360151290895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,24,2,128,1,float16,fp8,0,0.2882544040679932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,4,128,1,fp8,fp8,0,0.16149120330810546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,8,128,1,float16,float16,0,0.19096959829330445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,8,128,1,float16,fp8,0,0.16132320165634156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,8,128,1,fp8,fp8,0,0.16110880374908448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,24,24,128,1,float16,float16,0,0.21222879886627197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,24,1,128,1,float16,fp8,0,1.9654191970825194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,24,1,128,1,fp8,fp8,0,1.9648927688598632
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,24,1,128,1,float16,float16,0,2.2069168090820312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,24,2,128,1,float16,float16,0,2.1580991744995117
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,24,2,128,1,float16,fp8,0,1.9757823944091797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,24,2,128,1,fp8,fp8,0,1.9663232803344726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,24,4,128,1,float16,float16,0,2.3381536483764647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,24,4,128,1,float16,fp8,0,2.069691276550293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,24,4,128,1,fp8,fp8,0,1.970871925354004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,24,8,128,1,float16,float16,0,2.3104831695556642
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,24,8,128,1,float16,fp8,0,2.2280031204223634
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,24,128,1,float16,float16,0,1.397390365600586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,24,8,128,1,fp8,fp8,0,1.976848030090332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,24,128,1,float16,fp8,0,1.1954912185668944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,24,128,1,fp8,fp8,0,1.1572784423828124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,1,128,1,float16,float16,0,1.099342441558838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,1,128,1,float16,fp8,0,0.9984784126281738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,1,128,1,fp8,fp8,0,0.9985391616821289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,2,128,1,float16,float16,0,1.1143744468688965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,2,128,1,float16,fp8,0,0.9989871978759766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,4,128,1,float16,fp8,0,1.0005904197692872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,4,128,1,float16,float16,0,1.1439711570739746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,4,128,1,fp8,fp8,0,1.0010000228881837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,8,128,1,float16,float16,0,1.1912768363952637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,8,128,1,float16,fp8,0,1.0055439949035645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,8,128,1,fp8,fp8,0,1.013871955871582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,24,2,128,1,fp8,fp8,0,0.9991312026977539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,24,128,1,float16,float16,0,0.7086400032043457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,24,128,1,float16,fp8,0,0.594371223449707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,24,128,1,fp8,fp8,0,0.5935103893280029
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,1,128,1,float16,float16,0,0.5568463802337646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,1,128,1,float16,fp8,0,0.5162399768829345
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,1,128,1,fp8,fp8,0,0.5152671813964844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,2,128,1,float16,float16,0,0.5755648136138916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,2,128,1,float16,fp8,0,0.5159039974212647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,4,128,1,float16,float16,0,0.5746880054473877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,4,128,1,float16,fp8,0,0.5186863899230957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,4,128,1,fp8,fp8,0,0.5159855842590332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,8,128,1,float16,float16,0,0.5998335838317871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,8,128,1,float16,fp8,0,0.5217440128326416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,8,128,1,fp8,fp8,0,0.5255119800567627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,24,128,1,float16,float16,0,0.37381439208984374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,24,128,1,float16,fp8,0,0.3156383991241455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,24,128,1,fp8,fp8,0,0.31410560607910154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,1,128,1,float16,float16,0,0.29537439346313477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,1,128,1,float16,fp8,0,0.27482559680938723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,1,128,1,fp8,fp8,0,0.27278881072998046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,2,128,1,float16,float16,0,0.29633119106292727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,2,128,1,float16,fp8,0,0.2752608060836792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,2,128,1,fp8,fp8,0,0.27325439453125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,4,128,1,float16,float16,0,0.3041343927383423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,4,128,1,float16,fp8,0,0.2750191926956177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,4,128,1,fp8,fp8,0,0.2730655908584595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,8,128,1,float16,fp8,0,0.2768160104751587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,8,128,1,fp8,fp8,0,0.2749408006668091
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,24,128,1,float16,float16,0,0.20610880851745605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,24,128,1,float16,fp8,0,0.17391999959945678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,24,2,128,1,fp8,fp8,0,0.5161935806274414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,1,128,1,float16,float16,0,0.16256799697875976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,1,128,1,float16,fp8,0,0.15180959701538085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,1,128,1,fp8,fp8,0,0.1513808012008667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,2,128,1,float16,float16,0,0.16510239839553834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,2,128,1,float16,fp8,0,0.1517791986465454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,2,128,1,fp8,fp8,0,0.15174399614334105
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,4,128,1,float16,float16,0,0.16744320392608641
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,4,128,1,float16,fp8,0,0.15172799825668334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,4,128,1,fp8,fp8,0,0.1518239974975586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,8,128,1,float16,float16,0,0.17504639625549318
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,8,128,1,float16,fp8,0,0.1529584050178528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,8,128,1,fp8,fp8,0,0.1529088020324707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,24,128,1,float16,float16,0,0.11738400459289551
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,24,128,1,float16,fp8,0,0.09977440237998962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,24,128,1,fp8,fp8,0,0.09947519898414611
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,1,128,1,float16,float16,0,0.09721760153770446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,1,128,1,float16,fp8,0,0.09109119772911071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,1,128,1,fp8,fp8,0,0.09100000262260437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,2,128,1,float16,float16,0,0.0964959979057312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,2,128,1,float16,fp8,0,0.0911903977394104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,2,128,1,fp8,fp8,0,0.09068480134010315
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,4,128,1,float16,float16,0,0.09842879772186279
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,4,128,1,float16,fp8,0,0.09044960141181946
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,4,128,1,fp8,fp8,0,0.09079679846763611
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,8,128,1,float16,float16,0,0.09991999864578247
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,24,8,128,1,float16,float16,0,0.31751840114593505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,8,128,1,fp8,fp8,0,0.09099680185317993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,24,24,128,1,fp8,fp8,0,0.17461600303649902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,24,1,128,1,float16,float16,0,1.2948176383972168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,24,1,128,1,float16,fp8,0,1.2300335884094238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,24,1,128,1,fp8,fp8,0,1.23264799118042
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,24,2,128,1,float16,fp8,0,1.2309023857116699
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,24,2,128,1,fp8,fp8,0,1.2336959838867188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,24,8,128,1,float16,fp8,0,0.09030719995498657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,24,4,128,1,float16,float16,0,1.3463232040405273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,24,4,128,1,float16,fp8,0,1.2362112045288085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,24,4,128,1,fp8,fp8,0,1.2337183952331543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,24,8,128,1,float16,float16,0,1.4228495597839355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,24,8,128,1,float16,fp8,0,1.2419743537902832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,24,8,128,1,fp8,fp8,0,1.2396623611450195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,24,2,128,1,float16,float16,0,1.3174304008483886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,24,128,1,float16,float16,0,0.8994576454162597
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,24,128,1,float16,fp8,0,0.7452832221984863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,1,128,1,float16,float16,0,0.6609039783477784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,1,128,1,float16,fp8,0,0.6677152156829834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,1,128,1,fp8,fp8,0,0.6303264141082764
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,2,128,1,float16,float16,0,0.666267204284668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,2,128,1,float16,fp8,0,0.6494463920593262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,2,128,1,fp8,fp8,0,0.6306816101074219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,4,128,1,float16,float16,0,0.6885151863098145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,4,128,1,float16,fp8,0,0.6296559810638428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,4,128,1,fp8,fp8,0,0.6322224140167236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,8,128,1,float16,float16,0,0.7265391826629639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,8,128,1,float16,fp8,0,0.6328864097595215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,8,128,1,fp8,fp8,0,0.6353903770446777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,24,128,1,float16,float16,0,0.45140800476074217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,24,128,1,float16,fp8,0,0.3854912042617798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,24,128,1,fp8,fp8,0,0.3873136043548584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,1,128,1,float16,float16,0,0.34620161056518556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,24,24,128,1,fp8,fp8,0,0.7429440021514893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,1,128,1,float16,fp8,0,0.3284703969955444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,1,128,1,fp8,fp8,0,0.3281696081161499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,2,128,1,float16,fp8,0,0.3296544075012207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,2,128,1,float16,float16,0,0.348304009437561
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,2,128,1,fp8,fp8,0,0.3279455900192261
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,4,128,1,float16,float16,0,0.3650847911834717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,4,128,1,float16,fp8,0,0.3278768062591553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,4,128,1,fp8,fp8,0,0.32848320007324217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,8,128,1,float16,fp8,0,0.3304208040237427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,8,128,1,float16,float16,0,0.38378400802612306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,24,8,128,1,fp8,fp8,0,0.33031840324401857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,24,128,1,float16,float16,0,0.24145119190216063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,24,128,1,fp8,fp8,0,0.2072511911392212
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,1,128,1,float16,float16,0,0.18802239894866943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,1,128,1,float16,fp8,0,0.1763983964920044
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,1,128,1,fp8,fp8,0,0.17702560424804686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,2,128,1,float16,float16,0,0.18730720281600952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,2,128,1,float16,fp8,0,0.17717920541763305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,2,128,1,fp8,fp8,0,0.1770959973335266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,4,128,1,float16,float16,0,0.1926751971244812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,4,128,1,float16,fp8,0,0.1778607964515686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,4,128,1,fp8,fp8,0,0.17670719623565673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,8,128,1,float16,float16,0,0.20309600830078126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,8,128,1,float16,fp8,0,0.17830560207366944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,8,128,1,fp8,fp8,0,0.17811039686203003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,24,128,1,float16,float16,0,0.1351199984550476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,24,128,1,float16,fp8,0,0.11689120531082153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,24,128,1,fp8,fp8,0,0.11668000221252442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,1,128,1,float16,float16,0,0.10499039888381959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,1,128,1,float16,fp8,0,0.10029120445251465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,1,128,1,fp8,fp8,0,0.10072480440139771
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,2,128,1,float16,float16,0,0.10582879781723023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,2,128,1,float16,fp8,0,0.10089919567108155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,2,128,1,fp8,fp8,0,0.10050879716873169
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,4,128,1,float16,float16,0,0.107150399684906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,4,128,1,float16,fp8,0,0.10063519477844238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,4,128,1,fp8,fp8,0,0.10060000419616699
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,8,128,1,float16,float16,0,0.11288800239562988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,8,128,1,float16,fp8,0,0.10083359479904175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,24,8,128,1,fp8,fp8,0,0.10083359479904175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,24,24,128,1,float16,fp8,0,0.20811679363250732
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,24,128,1,float16,fp8,0,0.06983839869499206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,24,128,1,fp8,fp8,0,0.0699728012084961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,1,128,1,float16,float16,0,0.06783199906349183
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,1,128,1,float16,fp8,0,0.06383519768714904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,1,128,1,fp8,fp8,0,0.06371039748191834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,2,128,1,float16,float16,0,0.0678384006023407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,2,128,1,fp8,fp8,0,0.06365280151367188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,4,128,1,float16,float16,0,0.06835039854049682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,4,128,1,float16,fp8,0,0.06362400054931641
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,4,128,1,fp8,fp8,0,0.06374719738960266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,8,128,1,float16,float16,0,0.06978240013122558
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,8,128,1,float16,fp8,0,0.06376000046730042
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,8,128,1,fp8,fp8,0,0.06366879940032959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,24,1,128,1,float16,float16,0,1.3322239875793458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,24,1,128,1,float16,fp8,0,1.2947072029113769
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,24,128,1,float16,float16,0,0.0797648012638092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,24,1,128,1,fp8,fp8,0,1.308238410949707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,24,2,128,1,float16,float16,0,1.3254032135009766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,24,2,128,1,float16,fp8,0,0.06389120221138
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,24,2,128,1,float16,fp8,0,1.2942992210388184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,24,2,128,1,fp8,fp8,0,1.2991696357727052
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,24,4,128,1,float16,float16,0,1.394264030456543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,24,4,128,1,float16,fp8,0,1.2968576431274415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,24,4,128,1,fp8,fp8,0,1.2973520278930664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,24,8,128,1,float16,fp8,0,1.3141615867614747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,24,8,128,1,fp8,fp8,0,1.3038928031921386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,24,128,1,float16,float16,0,0.9272496223449707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,24,128,1,float16,fp8,0,0.8072223663330078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,24,128,1,fp8,fp8,0,0.807316780090332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,1,128,1,float16,float16,0,0.6651631832122803
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,1,128,1,float16,fp8,0,0.6581471920013428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,1,128,1,fp8,fp8,0,0.6578944206237793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,2,128,1,float16,float16,0,0.6706511974334717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,2,128,1,float16,fp8,0,0.6589856147766113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,2,128,1,fp8,fp8,0,0.6580448150634766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,4,128,1,float16,float16,0,0.6962128162384034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,4,128,1,float16,fp8,0,0.6604559898376465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,4,128,1,fp8,fp8,0,0.6597760200500489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,24,8,128,1,float16,float16,0,1.5010144233703613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,8,128,1,float16,float16,0,0.7491792201995849
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,8,128,1,float16,fp8,0,0.6640927791595459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,24,128,1,float16,fp8,0,0.415388822555542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,24,8,128,1,fp8,fp8,0,0.6634768009185791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,24,128,1,fp8,fp8,0,0.4158656120300293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,1,128,1,float16,float16,0,0.35335359573364256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,1,128,1,float16,fp8,0,0.3397200107574463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,1,128,1,fp8,fp8,0,0.34010241031646726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,2,128,1,float16,float16,0,0.3546511888504028
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,2,128,1,fp8,fp8,0,0.33988959789276124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,4,128,1,float16,float16,0,0.3645359992980957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,4,128,1,float16,fp8,0,0.3409199953079224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,4,128,1,fp8,fp8,0,0.3406480073928833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,8,128,1,float16,float16,0,0.38675360679626464
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,8,128,1,float16,fp8,0,0.3426575899124146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,8,128,1,fp8,fp8,0,0.34264800548553465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,24,128,1,float16,float16,0,0.24996480941772461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,24,128,1,float16,fp8,0,0.21935360431671141
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,24,128,1,fp8,fp8,0,0.21905438899993895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,1,128,1,float16,float16,0,0.1847007989883423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,1,128,1,float16,fp8,0,0.1806175947189331
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,1,128,1,fp8,fp8,0,0.1805456042289734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,2,128,1,float16,float16,0,0.18600319623947142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,2,128,1,float16,fp8,0,0.34014880657196045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,2,128,1,float16,fp8,0,0.18065279722213745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,2,128,1,fp8,fp8,0,0.18078880310058593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,4,128,1,float16,float16,0,0.1921679973602295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,4,128,1,float16,fp8,0,0.18051040172576904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,4,128,1,fp8,fp8,0,0.18129600286483766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,8,128,1,float16,float16,0,0.20549280643463136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,8,128,1,float16,fp8,0,0.1823632001876831
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,24,8,128,1,fp8,fp8,0,0.18197439908981322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,24,128,1,float16,float16,0,0.1373311996459961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,24,128,1,float16,fp8,0,0.12087199687957764
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,24,128,1,fp8,fp8,0,0.12097920179367065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,1,128,1,float16,float16,0,0.10292479991912842
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,1,128,1,float16,fp8,0,0.09887359738349914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,1,128,1,fp8,fp8,0,0.09993759989738464
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,2,128,1,float16,float16,0,0.10312800407409668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,2,128,1,float16,fp8,0,0.10027040243148803
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,2,128,1,fp8,fp8,0,0.098990398645401
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,4,128,1,float16,float16,0,0.10685759782791138
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,4,128,1,float16,fp8,0,0.10047999620437623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,4,128,1,fp8,fp8,0,0.10029439926147461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,24,24,128,1,float16,float16,0,0.48043041229248046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,8,128,1,float16,fp8,0,0.10053759813308716
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,8,128,1,fp8,fp8,0,0.1011423945426941
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,24,128,1,float16,float16,0,0.07928000092506408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,24,128,1,float16,fp8,0,0.07009440064430236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,24,128,1,fp8,fp8,0,0.06919999718666077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,1,128,1,float16,float16,0,0.0634223997592926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,1,128,1,float16,fp8,0,0.05983359813690185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,1,128,1,fp8,fp8,0,0.060464000701904295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,2,128,1,float16,fp8,0,0.06042879819869995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,2,128,1,fp8,fp8,0,0.06037279963493347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,4,128,1,float16,float16,0,0.0639680027961731
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,4,128,1,float16,fp8,0,0.059956800937652585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,4,128,1,fp8,fp8,0,0.060043197870254514
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,8,128,1,float16,float16,0,0.0661952018737793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,8,128,1,fp8,fp8,0,0.060782402753829956
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,24,128,1,float16,float16,0,0.0472544014453888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,24,128,1,float16,fp8,0,0.045710399746894836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,24,128,1,fp8,fp8,0,0.04670720100402832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,1,128,1,float16,float16,0,0.04368799924850464
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,1,128,1,float16,fp8,0,0.04147999882698059
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,1,128,1,fp8,fp8,0,0.04174880087375641
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,2,128,1,float16,float16,0,0.04371199905872345
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,2,128,1,float16,fp8,0,0.041331198811531064
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,24,8,128,1,float16,float16,0,0.11416159868240357
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,2,128,1,fp8,fp8,0,0.041231998801231386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,4,128,1,float16,float16,0,0.04481599926948547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,4,128,1,float16,fp8,0,0.041308799386024476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,4,128,1,fp8,fp8,0,0.041228801012039185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,8,128,1,float16,float16,0,0.04540480077266693
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,8,128,1,float16,fp8,0,0.0411215990781784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,24,8,128,1,fp8,fp8,0,0.04144159853458405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,2,128,1,float16,float16,0,0.0620576024055481
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,24,1,128,1,float16,float16,0,0.8379167556762696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,24,1,128,1,float16,fp8,0,0.8496864318847657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,24,1,128,1,fp8,fp8,0,0.847374439239502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,24,8,128,1,float16,fp8,0,0.06046079993247986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,24,2,128,1,float16,float16,0,0.8450863838195801
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,24,2,128,1,float16,fp8,0,0.850273609161377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,24,2,128,1,fp8,fp8,0,0.8481648445129395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,24,4,128,1,float16,float16,0,0.8779536247253418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,24,4,128,1,float16,fp8,0,0.8504688262939453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,24,4,128,1,fp8,fp8,0,0.8517392158508301
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,24,8,128,1,float16,float16,0,0.952182388305664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,24,8,128,1,float16,fp8,0,0.8548303604125976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,24,128,1,float16,float16,0,0.6213535785675048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,24,8,128,1,fp8,fp8,0,0.8546751976013184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,24,128,1,float16,fp8,0,0.5440879821777344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,1,128,1,float16,float16,0,0.4340400218963623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,1,128,1,float16,fp8,0,0.4334479808807373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,1,128,1,fp8,fp8,0,0.43372321128845215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,2,128,1,float16,float16,0,0.43741598129272463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,2,128,1,float16,fp8,0,0.43367681503295896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,2,128,1,fp8,fp8,0,0.43375201225280763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,4,128,1,float16,float16,0,0.45415520668029785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,4,128,1,float16,fp8,0,0.4349184036254883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,4,128,1,fp8,fp8,0,0.4344672203063965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,8,128,1,float16,float16,0,0.49131197929382325
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,8,128,1,float16,fp8,0,0.43789920806884763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,24,128,1,float16,float16,0,0.32106719017028806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,24,128,1,float16,fp8,0,0.2824143886566162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,1,128,1,float16,float16,0,0.2254512071609497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,24,128,1,fp8,fp8,0,0.28292479515075686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,1,128,1,float16,fp8,0,0.2257904052734375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,1,128,1,fp8,fp8,0,0.22703518867492675
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,2,128,1,float16,float16,0,0.22652640342712402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,2,128,1,float16,fp8,0,0.2273711919784546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,2,128,1,fp8,fp8,0,0.22624480724334717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,24,128,1,fp8,fp8,0,0.5439536094665527
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,4,128,1,float16,fp8,0,0.2269968032836914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,4,128,1,fp8,fp8,0,0.22596158981323242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,8,128,1,float16,float16,0,0.25486719608306885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,8,128,1,float16,fp8,0,0.22799038887023926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,8,128,1,fp8,fp8,0,0.22834401130676268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,24,128,1,float16,float16,0,0.17011359930038453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,24,128,1,float16,fp8,0,0.15123039484024048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,24,128,1,fp8,fp8,0,0.15035680532455445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,1,128,1,float16,float16,0,0.12314560413360595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,24,8,128,1,fp8,fp8,0,0.4367648124694824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,1,128,1,float16,fp8,0,0.1213647961616516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,1,128,1,fp8,fp8,0,0.12098879814147949
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,2,128,1,float16,float16,0,0.1232416033744812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,2,128,1,float16,fp8,0,0.1217344045639038
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,2,128,1,fp8,fp8,0,0.12156480550765991
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,4,128,1,float16,float16,0,0.12756479978561402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,4,128,1,float16,fp8,0,0.12283999919891357
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,4,128,1,fp8,fp8,0,0.12285439968109131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,8,128,1,float16,float16,0,0.13808000087738037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,8,128,1,float16,fp8,0,0.12329119443893433
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,24,8,128,1,fp8,fp8,0,0.12321759462356567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,24,4,128,1,float16,float16,0,0.2380608081817627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,24,128,1,float16,float16,0,0.0945904016494751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,24,128,1,float16,fp8,0,0.08456479907035827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,24,128,1,fp8,fp8,0,0.08587520122528076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,1,128,1,float16,fp8,0,0.06941440105438232
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,1,128,1,fp8,fp8,0,0.06986399888992309
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,2,128,1,float16,float16,0,0.07014240026473999
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,2,128,1,float16,fp8,0,0.06990399956703186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,4,128,1,float16,float16,0,0.07347840070724487
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,4,128,1,float16,fp8,0,0.06992800235748291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,4,128,1,fp8,fp8,0,0.06998080015182495
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,8,128,1,float16,float16,0,0.07867519855499268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,8,128,1,float16,fp8,0,0.06988800168037415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,8,128,1,fp8,fp8,0,0.06988800168037415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,24,128,1,float16,float16,0,0.0553600013256073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,24,128,1,float16,fp8,0,0.04944320023059845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,24,128,1,fp8,fp8,0,0.04943839907646179
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,1,128,1,float16,float16,0,0.04524320065975189
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,1,128,1,float16,fp8,0,0.0433023989200592
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,1,128,1,fp8,fp8,0,0.043249601125717164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,2,128,1,float16,float16,0,0.045300799608230594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,2,128,1,float16,fp8,0,0.043289598822593686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,2,128,1,fp8,fp8,0,0.043303999304771426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,4,128,1,float16,fp8,0,0.04328320026397705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,4,128,1,fp8,fp8,0,0.043244799971580504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,1,128,1,float16,float16,0,0.06985599994659424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,8,128,1,float16,float16,0,0.04730400145053863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,8,128,1,float16,fp8,0,0.04316479861736298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,8,128,1,fp8,fp8,0,0.04321280121803284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,24,128,1,float16,float16,0,0.03092319965362549
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,24,128,1,fp8,fp8,0,0.03049759864807129
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,1,128,1,float16,float16,0,0.026862400770187377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,1,128,1,float16,fp8,0,0.026795199513435362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,1,128,1,fp8,fp8,0,0.026743999123573302
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,2,128,1,float16,float16,0,0.027051201462745665
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,2,128,1,float16,fp8,0,0.026902401447296144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,2,128,1,fp8,fp8,0,0.02680160105228424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,4,128,1,float16,float16,0,0.028856000304222106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,4,128,1,float16,fp8,0,0.026833599805831908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,4,128,1,fp8,fp8,0,0.026796799898147584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,8,128,1,float16,float16,0,0.02889440059661865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,8,128,1,float16,fp8,0,0.026833599805831908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,8,128,1,fp8,fp8,0,0.02680160105228424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,24,4,128,1,float16,float16,0,0.045372799038887024
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,24,1,128,1,float16,float16,0,0.9116687774658203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,24,2,128,1,fp8,fp8,0,0.06972320079803467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,24,1,128,1,float16,fp8,0,0.9529680252075196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,24,1,128,1,fp8,fp8,0,0.9537808418273925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,24,2,128,1,float16,float16,0,0.9214015960693359
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,24,2,128,1,float16,fp8,0,0.9528304100036621
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,24,2,128,1,fp8,fp8,0,0.9534576416015625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,24,4,128,1,float16,float16,0,0.9706576347351075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,24,4,128,1,float16,fp8,0,0.9556063652038574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,24,4,128,1,fp8,fp8,0,0.9553008079528809
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,24,8,128,1,float16,float16,0,1.0589471817016602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,24,24,128,1,float16,fp8,0,0.030801600217819212
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,24,8,128,1,float16,fp8,0,0.9605327606201172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,24,8,128,1,fp8,fp8,0,0.9606719970703125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,24,128,1,float16,float16,0,0.705840015411377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,24,128,1,float16,fp8,0,0.6304912090301513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,1,128,1,float16,float16,0,0.4661407947540283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,24,128,1,fp8,fp8,0,0.6289519786834716
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,1,128,1,float16,fp8,0,0.485591983795166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,1,128,1,fp8,fp8,0,0.4859856128692627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,2,128,1,float16,float16,0,0.4698239803314209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,2,128,1,float16,fp8,0,0.4850815773010254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,2,128,1,fp8,fp8,0,0.48579039573669436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,4,128,1,float16,float16,0,0.4960224151611328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,4,128,1,float16,fp8,0,0.4866943836212158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,4,128,1,fp8,fp8,0,0.48586559295654297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,8,128,1,float16,float16,0,0.5391551971435546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,8,128,1,float16,fp8,0,0.49040799140930175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,24,8,128,1,fp8,fp8,0,0.48897600173950195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,24,128,1,float16,float16,0,0.3615216016769409
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,24,128,1,float16,fp8,0,0.32383360862731936
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,1,128,1,float16,float16,0,0.24240798950195314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,24,128,1,fp8,fp8,0,0.32406399250030515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,1,128,1,float16,fp8,0,0.2508464097976685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,1,128,1,fp8,fp8,0,0.2505951881408691
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,2,128,1,float16,float16,0,0.2438512086868286
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,2,128,1,float16,fp8,0,0.25170719623565674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,2,128,1,fp8,fp8,0,0.2508368015289307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,4,128,1,float16,float16,0,0.25622560977935793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,4,128,1,float16,fp8,0,0.2517839908599854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,4,128,1,fp8,fp8,0,0.2520607948303223
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,8,128,1,float16,float16,0,0.2775712013244629
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,8,128,1,float16,fp8,0,0.25296480655670167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,24,128,1,float16,float16,0,0.1897536039352417
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,24,128,1,float16,fp8,0,0.17013280391693114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,24,128,1,fp8,fp8,0,0.17031840085983277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,1,128,1,float16,float16,0,0.12986719608306885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,1,128,1,float16,fp8,0,0.13316800594329833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,1,128,1,fp8,fp8,0,0.1331104040145874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,2,128,1,float16,float16,0,0.1310256004333496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,2,128,1,float16,fp8,0,0.13364479541778565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,2,128,1,fp8,fp8,0,0.13341280221939086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,4,128,1,float16,fp8,0,0.13345919847488402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,4,128,1,fp8,fp8,0,0.13344000577926635
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,8,128,1,float16,float16,0,0.14818400144577026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,8,128,1,float16,fp8,0,0.13490560054779052
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,8,128,1,fp8,fp8,0,0.1352992057800293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,24,128,1,float16,float16,0,0.10440160036087036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,24,128,1,float16,fp8,0,0.09382399916648865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,24,128,1,fp8,fp8,0,0.09357119798660278
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,1,128,1,float16,float16,0,0.07391200065612794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,1,128,1,float16,fp8,0,0.07390400171279907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,24,8,128,1,fp8,fp8,0,0.25371999740600587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,1,128,1,fp8,fp8,0,0.07358880043029785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,2,128,1,float16,fp8,0,0.07307999730110168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,2,128,1,fp8,fp8,0,0.07347519993782044
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,4,128,1,float16,float16,0,0.07728639841079712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,4,128,1,float16,fp8,0,0.07405120134353638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,4,128,1,fp8,fp8,0,0.07396960258483887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,8,128,1,float16,float16,0,0.08317599892616272
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,24,4,128,1,float16,float16,0,0.13677120208740234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,8,128,1,float16,fp8,0,0.07455199956893921
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,8,128,1,fp8,fp8,0,0.07433760166168213
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,24,128,1,float16,float16,0,0.06021919846534729
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,24,128,1,float16,fp8,0,0.05343520045280457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,24,128,1,fp8,fp8,0,0.05347999930381775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,1,128,1,float16,float16,0,0.04521600008010864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,1,128,1,float16,fp8,0,0.04447680115699768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,1,128,1,fp8,fp8,0,0.04452959895133972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,2,128,1,float16,float16,0,0.04534879922866821
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,2,128,1,float16,fp8,0,0.044758400321006774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,2,128,1,fp8,fp8,0,0.04438399970531463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,4,128,1,float16,float16,0,0.04564160108566284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,4,128,1,float16,fp8,0,0.044495999813079834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,8,128,1,float16,float16,0,0.047598400712013246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,8,128,1,float16,fp8,0,0.04486719965934753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,4,128,1,fp8,fp8,0,0.04427359998226166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,24,8,128,1,fp8,fp8,0,0.04413279891014099
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,24,128,1,float16,float16,0,0.035097599029541016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,24,128,1,float16,fp8,0,0.034974399209022525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,24,128,1,fp8,fp8,0,0.03503040075302124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,1,128,1,float16,float16,0,0.032708799839019774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,1,128,1,float16,fp8,0,0.03094559907913208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,1,128,1,fp8,fp8,0,0.03084160089492798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,2,128,1,float16,float16,0,0.03222399950027466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,2,128,1,float16,fp8,0,0.031004801392555237
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,2,128,1,fp8,fp8,0,0.03091840147972107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,4,128,1,float16,float16,0,0.03296799957752228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,4,128,1,fp8,fp8,0,0.030931198596954347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,4,128,1,float16,fp8,0,0.031040000915527343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,8,128,1,float16,float16,0,0.03324959874153137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,8,128,1,float16,fp8,0,0.030958399176597595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,24,8,128,1,fp8,fp8,0,0.030926400423049928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,24,128,1,float16,fp8,0,0.022937600314617158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,24,128,1,fp8,fp8,0,0.022694399952888487
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,1,128,1,float16,float16,0,0.022711999714374542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,1,128,1,float16,fp8,0,0.02096959948539734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,24,2,128,1,float16,float16,0,0.07449600100517273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,1,128,1,fp8,fp8,0,0.020766399800777435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,2,128,1,float16,float16,0,0.022683200240135194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,2,128,1,float16,fp8,0,0.021427200734615327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,4,128,1,float16,float16,0,0.022728000581264497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,4,128,1,float16,fp8,0,0.020747199654579163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,4,128,1,fp8,fp8,0,0.020734399557113647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,8,128,1,float16,float16,0,0.022793599963188173
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,8,128,1,float16,fp8,0,0.020814399421215057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,8,128,1,fp8,fp8,0,0.021140800416469575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,24,1,128,1,float16,float16,0,0.7211167812347412
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,24,1,128,1,float16,fp8,0,0.7793504238128662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,24,2,128,1,float16,float16,0,0.7272575855255127
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,24,128,1,float16,float16,0,0.02478879988193512
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,24,2,128,1,float16,fp8,0,0.7793471813201904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,24,2,128,1,fp8,fp8,0,0.020667199790477753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,24,2,128,1,fp8,fp8,0,0.7778719902038574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,24,4,128,1,float16,float16,0,0.7687535762786866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,24,4,128,1,float16,fp8,0,0.7787231922149658
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,24,4,128,1,fp8,fp8,0,0.7785280227661133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,24,8,128,1,float16,float16,0,0.8482895851135254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,24,8,128,1,float16,fp8,0,0.7805088043212891
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,24,1,128,1,fp8,fp8,0,0.7789936065673828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,24,8,128,1,fp8,fp8,0,0.7807024002075196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,24,128,1,float16,fp8,0,0.5337024211883545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,1,128,1,float16,float16,0,0.36795840263366697
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,24,128,1,fp8,fp8,0,0.5345424175262451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,1,128,1,float16,fp8,0,0.3956943988800049
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,1,128,1,fp8,fp8,0,0.39582080841064454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,2,128,1,float16,float16,0,0.3708832025527954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,2,128,1,float16,fp8,0,0.39568800926208497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,2,128,1,fp8,fp8,0,0.3955967903137207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,4,128,1,float16,float16,0,0.3905855894088745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,4,128,1,float16,fp8,0,0.3959455966949463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,4,128,1,fp8,fp8,0,0.39641120433807375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,8,128,1,float16,float16,0,0.4299488067626953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,8,128,1,float16,fp8,0,0.39766879081726075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,8,128,1,fp8,fp8,0,0.3975712060928345
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,24,128,1,float16,float16,0,0.3039360046386719
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,24,128,1,float16,fp8,0,0.27424960136413573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,24,24,128,1,float16,float16,0,0.5940688133239747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,24,128,1,fp8,fp8,0,0.2747584104537964
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,1,128,1,float16,float16,0,0.1926767945289612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,1,128,1,fp8,fp8,0,0.2050544023513794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,2,128,1,float16,float16,0,0.19311519861221313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,2,128,1,float16,fp8,0,0.20504000186920165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,2,128,1,fp8,fp8,0,0.2050800085067749
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,4,128,1,float16,float16,0,0.20285439491271973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,4,128,1,float16,fp8,0,0.20527200698852538
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,4,128,1,fp8,fp8,0,0.2051408052444458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,8,128,1,float16,float16,0,0.2231600046157837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,8,128,1,float16,fp8,0,0.20558080673217774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,8,128,1,fp8,fp8,0,0.20638399124145507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,24,128,1,float16,float16,0,0.15979360342025756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,24,128,1,float16,fp8,0,0.14511200189590454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,1,128,1,float16,float16,0,0.10373760461807251
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,24,128,1,fp8,fp8,0,0.14471839666366576
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,1,128,1,float16,fp8,0,0.10871679782867431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,1,128,1,fp8,fp8,0,0.1089967966079712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,2,128,1,float16,float16,0,0.10443680286407471
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,2,128,1,float16,fp8,0,0.1089359998703003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,2,128,1,fp8,fp8,0,0.1089087963104248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,4,128,1,float16,float16,0,0.10906080007553101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,4,128,1,float16,fp8,0,0.10940639972686768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,4,128,1,fp8,fp8,0,0.10920480489730836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,8,128,1,float16,float16,0,0.1192255973815918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,8,128,1,float16,fp8,0,0.11034400463104248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,24,8,128,1,fp8,fp8,0,0.110043203830719
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,24,128,1,float16,float16,0,0.08714399933815002
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,24,128,1,float16,fp8,0,0.0786080002784729
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,24,1,128,1,float16,fp8,0,0.20574719905853273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,1,128,1,float16,float16,0,0.058267199993133546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,1,128,1,float16,fp8,0,0.059729599952697755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,1,128,1,fp8,fp8,0,0.059569597244262695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,2,128,1,float16,float16,0,0.0596127986907959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,2,128,1,float16,fp8,0,0.059614402055740354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,2,128,1,fp8,fp8,0,0.05987200140953064
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,4,128,1,float16,float16,0,0.06184160113334656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,4,128,1,float16,fp8,0,0.059617602825164796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,8,128,1,float16,float16,0,0.06785439848899841
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,8,128,1,float16,fp8,0,0.0599120020866394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,8,128,1,fp8,fp8,0,0.061363202333450315
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,24,128,1,float16,float16,0,0.0495743989944458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,24,128,1,float16,fp8,0,0.045296001434326175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,1,128,1,float16,float16,0,0.03705120086669922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,1,128,1,float16,fp8,0,0.03701440095901489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,1,128,1,fp8,fp8,0,0.037038400769233704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,2,128,1,float16,float16,0,0.037084800004959104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,2,128,1,float16,fp8,0,0.03701600134372711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,2,128,1,fp8,fp8,0,0.03704639971256256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,4,128,1,float16,float16,0,0.037118399143218996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,4,128,1,float16,fp8,0,0.03704479932785034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,4,128,1,fp8,fp8,0,0.03704800009727478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,8,128,1,float16,float16,0,0.03930079936981201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,8,128,1,float16,fp8,0,0.037041598558425905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,8,128,1,fp8,fp8,0,0.03705439865589142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,24,128,1,float16,float16,0,0.029798400402069092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,24,128,1,float16,fp8,0,0.03091680109500885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,24,128,1,fp8,fp8,0,0.030982398986816408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,24,128,1,fp8,fp8,0,0.07944480180740357
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,1,128,1,float16,fp8,0,0.025271999835968017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,1,128,1,fp8,fp8,0,0.02492319941520691
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,2,128,1,float16,float16,0,0.025430399179458617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,2,128,1,float16,fp8,0,0.025091201066970825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,2,128,1,fp8,fp8,0,0.024758400022983552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,24,4,128,1,fp8,fp8,0,0.059931200742721555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,4,128,1,float16,float16,0,0.026236799359321595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,4,128,1,float16,fp8,0,0.02484000027179718
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,4,128,1,fp8,fp8,0,0.024886399507522583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,8,128,1,float16,float16,0,0.026984000205993654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,24,24,128,1,fp8,fp8,0,0.045158401131629944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,8,128,1,float16,fp8,0,0.02481440007686615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,8,128,1,fp8,fp8,0,0.024843199551105498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,24,128,1,fp8,fp8,0,0.020654399693012238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,1,128,1,float16,float16,0,0.018529599905014037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,1,128,1,float16,fp8,0,0.018569600582122803
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,1,128,1,fp8,fp8,0,0.018571199476718904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,2,128,1,float16,float16,0,0.018435199558734894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,2,128,1,float16,fp8,0,0.01844480037689209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,2,128,1,fp8,fp8,0,0.018467199802398682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,4,128,1,float16,float16,0,0.018595199286937713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,4,128,1,float16,fp8,0,0.018561600148677825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,4,128,1,fp8,fp8,0,0.018620799481868743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,8,128,1,float16,float16,0,0.01867839992046356
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,8,128,1,float16,fp8,0,0.01854719966650009
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,8,128,1,fp8,fp8,0,0.018566399812698364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,24,128,1,float16,float16,0,0.01857919991016388
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,24,128,1,float16,fp8,0,0.018606400489807128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,24,1,128,1,float16,float16,0,0.02675040066242218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,24,128,1,fp8,fp8,0,0.01860159933567047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,1,128,1,float16,float16,0,0.016740800440311433
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,1,128,1,float16,fp8,0,0.016657599806785585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,1,128,1,fp8,fp8,0,0.016774399578571318
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,2,128,1,float16,float16,0,0.01669120043516159
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,2,128,1,float16,fp8,0,0.01672479957342148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,2,128,1,fp8,fp8,0,0.01669919937849045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,4,128,1,float16,float16,0,0.01669600009918213
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,4,128,1,float16,fp8,0,0.01722240000963211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,4,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,8,128,1,float16,float16,0,0.016763199865818024
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,8,128,1,float16,fp8,0,0.016680000722408293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,24,128,1,float16,fp8,0,0.018619200587272643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,24,8,128,1,fp8,fp8,0,0.016702400147914888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,24,1,128,1,float16,float16,0,0.3131920099258423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,24,1,128,1,float16,fp8,0,0.34576001167297366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,24,1,128,1,fp8,fp8,0,0.34495038986206056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,24,2,128,1,float16,float16,0,0.31538240909576415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,24,2,128,1,float16,fp8,0,0.34446239471435547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,24,2,128,1,fp8,fp8,0,0.3455008029937744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,24,4,128,1,float16,float16,0,0.333188796043396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,24,4,128,1,float16,fp8,0,0.3454576015472412
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,24,4,128,1,fp8,fp8,0,0.3449264049530029
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,24,8,128,1,float16,float16,0,0.3719935894012451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,24,8,128,1,float16,fp8,0,0.34563679695129396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,24,8,128,1,fp8,fp8,0,0.34596478939056396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,24,128,1,float16,float16,0,0.2706959962844849
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,24,24,128,1,float16,float16,0,0.018643200397491455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,24,128,1,float16,fp8,0,0.24645919799804689
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,24,128,1,fp8,fp8,0,0.24596478939056396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,1,128,1,float16,fp8,0,0.1794864058494568
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,1,128,1,fp8,fp8,0,0.1792240023612976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,2,128,1,float16,float16,0,0.1654255986213684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,2,128,1,float16,fp8,0,0.17884000539779663
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,2,128,1,fp8,fp8,0,0.17989120483398438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,4,128,1,float16,float16,0,0.1739743947982788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,4,128,1,float16,fp8,0,0.18035999536514283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,8,128,1,float16,float16,0,0.19313280582427977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,8,128,1,float16,fp8,0,0.18041280508041382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,4,128,1,fp8,fp8,0,0.17947039604187012
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,8,128,1,fp8,fp8,0,0.1800096035003662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,24,128,1,float16,fp8,0,0.13088159561157225
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,24,128,1,fp8,fp8,0,0.13110400438308717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,1,128,1,float16,fp8,0,0.09653599858283997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,1,128,1,fp8,fp8,0,0.09658240079879761
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,2,128,1,float16,float16,0,0.09051359891891479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,2,128,1,float16,fp8,0,0.09673920273780823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,2,128,1,fp8,fp8,0,0.0968400001525879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,4,128,1,float16,float16,0,0.09461920261383057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,4,128,1,float16,fp8,0,0.09669439792633057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,4,128,1,fp8,fp8,0,0.09669280052185059
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,24,1,128,1,float16,float16,0,0.16416000127792357
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,8,128,1,float16,float16,0,0.10445280075073242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,8,128,1,float16,fp8,0,0.09666560292243957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,8,128,1,fp8,fp8,0,0.09657440185546876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,24,128,1,float16,float16,0,0.08042399883270264
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,24,128,1,float16,fp8,0,0.07257760167121888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,24,128,1,fp8,fp8,0,0.07300959825515747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,1,128,1,float16,float16,0,0.0523855984210968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,1,128,1,float16,fp8,0,0.05344480276107788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,1,128,1,fp8,fp8,0,0.05340800285339355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,2,128,1,float16,float16,0,0.052395200729370116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,2,128,1,float16,fp8,0,0.053523200750350955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,2,128,1,fp8,fp8,0,0.05345119833946228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,4,128,1,float16,float16,0,0.05548480153083801
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,4,128,1,float16,fp8,0,0.05360640287399292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,4,128,1,fp8,fp8,0,0.05353599786758423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,8,128,1,float16,float16,0,0.06059520244598389
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,24,128,1,float16,float16,0,0.14351199865341185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,8,128,1,float16,fp8,0,0.055358397960662845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,24,128,1,float16,float16,0,0.04520959854125976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,24,128,1,float16,fp8,0,0.04111039936542511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,24,1,128,1,float16,float16,0,0.0903823971748352
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,24,128,1,fp8,fp8,0,0.04116159975528717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,1,128,1,float16,float16,0,0.03102239966392517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,1,128,1,float16,fp8,0,0.031027200818061828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,1,128,1,fp8,fp8,0,0.0315200001001358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,2,128,1,float16,float16,0,0.031011199951171874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,2,128,1,float16,fp8,0,0.031086400151252747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,2,128,1,fp8,fp8,0,0.03178719878196716
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,4,128,1,float16,fp8,0,0.03120799958705902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,4,128,1,fp8,fp8,0,0.031188800930976868
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,8,128,1,float16,float16,0,0.03339200019836426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,8,128,1,float16,fp8,0,0.031011199951171874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,8,128,1,fp8,fp8,0,0.032795199751853944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,24,128,1,float16,float16,0,0.026924800872802735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,24,128,1,float16,fp8,0,0.026939201354980468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,24,128,1,fp8,fp8,0,0.02694239914417267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,1,128,1,float16,float16,0,0.022873599827289582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,1,128,1,float16,fp8,0,0.022836799919605254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,1,128,1,fp8,fp8,0,0.02282720059156418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,2,128,1,float16,float16,0,0.022753599286079406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,2,128,1,fp8,fp8,0,0.022779199481010436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,2,128,1,float16,fp8,0,0.02274399995803833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,4,128,1,float16,float16,0,0.02282080054283142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,4,128,1,float16,fp8,0,0.022776000201702118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,4,128,1,fp8,fp8,0,0.022755199670791627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,8,128,1,float16,float16,0,0.024804799258708952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,8,128,1,float16,fp8,0,0.02282239943742752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,24,8,128,1,fp8,fp8,0,0.022844800353050233
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,24,128,1,float16,float16,0,0.018670399487018586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,24,128,1,float16,fp8,0,0.018518400192260743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,24,128,1,fp8,fp8,0,0.01854880005121231
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,1,128,1,float16,float16,0,0.015982399880886077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,1,128,1,float16,fp8,0,0.016572800278663636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,1,128,1,fp8,fp8,0,0.016542400419712066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,2,128,1,float16,float16,0,0.016487999260425566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,2,128,1,float16,fp8,0,0.01650879979133606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,2,128,1,fp8,fp8,0,0.01658560037612915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,4,128,1,float16,float16,0,0.016545599699020384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,4,128,1,float16,fp8,0,0.016502399742603303
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,24,8,128,1,fp8,fp8,0,0.054876798391342164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,4,128,1,fp8,fp8,0,0.0165120005607605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,8,128,1,float16,float16,0,0.016648000478744505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,8,128,1,float16,fp8,0,0.0165120005607605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,24,8,128,1,fp8,fp8,0,0.015083199739456177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,24,128,1,float16,float16,0,0.016521599888801575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,24,128,1,float16,fp8,0,0.016457599401474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,24,128,1,fp8,fp8,0,0.016548800468444824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,1,128,1,float16,fp8,0,0.014646400511264802
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,1,128,1,fp8,fp8,0,0.014604799449443817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,2,128,1,float16,float16,0,0.014665600657463074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,2,128,1,float16,fp8,0,0.014655999839305878
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,2,128,1,fp8,fp8,0,0.014753599464893342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,4,128,1,float16,float16,0,0.014614400267601014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,4,128,1,float16,fp8,0,0.01467680037021637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,4,128,1,fp8,fp8,0,0.014646400511264802
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,8,128,1,float16,float16,0,0.014638400077819825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,8,128,1,float16,fp8,0,0.01464959979057312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,8,128,1,fp8,fp8,0,0.014612799882888794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,24,128,1,float16,float16,0,0.015228800475597382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,24,128,1,float16,fp8,0,0.014569599926471711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,24,128,1,fp8,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,1,128,1,float16,float16,0,0.014590400457382201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,1,128,1,float16,fp8,0,0.014523200690746307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,1,128,1,fp8,fp8,0,0.014467200636863709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,2,128,1,float16,float16,0,0.014575999975204468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,2,128,1,float16,fp8,0,0.014467200636863709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,2,128,1,fp8,fp8,0,0.014483200013637542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,4,128,1,float16,float16,0,0.014552000164985656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,4,128,1,float16,fp8,0,0.01446399986743927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,4,128,1,fp8,fp8,0,0.014422400295734406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,24,1,128,1,float16,float16,0,0.014526399970054626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,8,128,1,float16,float16,0,0.014707200229167938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,8,128,1,float16,fp8,0,0.01451680064201355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,24,8,128,1,fp8,fp8,0,0.014526399970054626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,24,1,128,1,float16,float16,0,0.18859519958496093
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,24,1,128,1,float16,fp8,0,0.20502879619598388
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,24,1,128,1,fp8,fp8,0,0.20505120754241943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,24,2,128,1,float16,float16,0,0.18867360353469848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,24,2,128,1,float16,fp8,0,0.20511040687561036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,24,2,128,1,fp8,fp8,0,0.2050544023513794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,24,4,128,1,float16,float16,0,0.1978783965110779
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,24,4,128,1,float16,fp8,0,0.20503358840942382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,24,4,128,1,fp8,fp8,0,0.2050447940826416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,24,8,128,1,float16,float16,0,0.21612160205841063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,24,8,128,1,float16,fp8,0,0.20505599975585936
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,24,8,128,1,fp8,fp8,0,0.20509281158447265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,24,128,1,float16,float16,0,0.15087519884109496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,24,128,1,float16,fp8,0,0.14158560037612916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,24,128,1,fp8,fp8,0,0.141702401638031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,1,128,1,float16,fp8,0,0.10871200561523438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,1,128,1,fp8,fp8,0,0.10868480205535888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,2,128,1,float16,float16,0,0.10068000555038452
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,2,128,1,float16,fp8,0,0.10868639945983886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,2,128,1,fp8,fp8,0,0.10879360437393189
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,4,128,1,float16,float16,0,0.10471839904785156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,4,128,1,float16,fp8,0,0.10865919589996338
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,4,128,1,fp8,fp8,0,0.1086624026298523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,8,128,1,float16,float16,0,0.1146496057510376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,8,128,1,float16,fp8,0,0.10867359638214111
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,8,128,1,fp8,fp8,0,0.10863679647445679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,24,4,128,1,float16,float16,0,0.03171679973602295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,24,128,1,float16,float16,0,0.08084480166435241
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,24,128,1,float16,fp8,0,0.07738720178604126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,1,128,1,float16,float16,0,0.057118397951126096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,1,128,1,float16,fp8,0,0.0594864010810852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,1,128,1,fp8,fp8,0,0.059614402055740354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,2,128,1,float16,float16,0,0.057067197561264035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,2,128,1,float16,fp8,0,0.05961599946022034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,4,128,1,float16,float16,0,0.0601311981678009
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,4,128,1,float16,fp8,0,0.05955039858818054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,4,128,1,fp8,fp8,0,0.05956159830093384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,8,128,1,float16,float16,0,0.06374239921569824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,8,128,1,float16,fp8,0,0.059627199172973634
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,8,128,1,fp8,fp8,0,0.05960000157356262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,24,128,1,float16,float16,0,0.04598079919815064
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,24,128,1,float16,fp8,0,0.043249601125717164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,24,1,128,1,float16,float16,0,0.10085760354995728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,24,128,1,fp8,fp8,0,0.04319039881229401
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,1,128,1,float16,float16,0,0.0336896002292633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,1,128,1,float16,fp8,0,0.034999999403953555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,1,128,1,fp8,fp8,0,0.035011199116706845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,2,128,1,float16,float16,0,0.033048000931739804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,2,128,1,float16,fp8,0,0.03494080007076263
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,2,128,1,fp8,fp8,0,0.03505600094795227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,4,128,1,float16,float16,0,0.0348800003528595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,4,128,1,float16,fp8,0,0.03497599959373474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,4,128,1,fp8,fp8,0,0.035043200850486754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,8,128,1,float16,float16,0,0.03714880049228668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,8,128,1,float16,fp8,0,0.0350271999835968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,24,8,128,1,fp8,fp8,0,0.0350383996963501
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,24,128,1,float16,float16,0,0.024883200228214265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,24,128,1,float16,fp8,0,0.02629440128803253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,24,128,1,fp8,fp8,0,0.02608479857444763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,1,128,1,float16,float16,0,0.020739200711250304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,1,128,1,float16,fp8,0,0.021480000019073485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,1,128,1,fp8,fp8,0,0.022724799811840057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,2,128,1,fp8,fp8,0,0.059552001953125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,2,128,1,float16,float16,0,0.020827199518680572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,2,128,1,float16,fp8,0,0.022524799406528472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,4,128,1,float16,float16,0,0.020686399936676026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,4,128,1,float16,fp8,0,0.02072640061378479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,4,128,1,fp8,fp8,0,0.02074880003929138
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,8,128,1,float16,float16,0,0.022785599529743194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,8,128,1,float16,fp8,0,0.020759999752044678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,8,128,1,fp8,fp8,0,0.021851199865341186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,24,128,1,float16,float16,0,0.018649600446224213
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,24,128,1,float16,fp8,0,0.01870400011539459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,24,128,1,fp8,fp8,0,0.018622399866580965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,1,128,1,float16,float16,0,0.01659200042486191
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,1,128,1,float16,fp8,0,0.01656160056591034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,1,128,1,fp8,fp8,0,0.016547200083732606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,2,128,1,float16,float16,0,0.016524800658226015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,2,128,1,float16,fp8,0,0.01658560037612915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,2,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,4,128,1,float16,float16,0,0.016630400717258454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,4,128,1,float16,fp8,0,0.01652960032224655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,4,128,1,fp8,fp8,0,0.01653759926557541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,8,128,1,float16,float16,0,0.016657599806785585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,8,128,1,float16,fp8,0,0.01656000018119812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,24,8,128,1,fp8,fp8,0,0.016579200327396394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,24,128,1,float16,float16,0,0.012492799758911132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,24,128,1,float16,fp8,0,0.012462399899959564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,24,128,1,fp8,fp8,0,0.012491200119256973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,1,128,1,float16,float16,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,24,2,128,1,fp8,fp8,0,0.02274399995803833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,1,128,1,float16,fp8,0,0.012495999783277511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,1,128,1,fp8,fp8,0,0.012567999958992004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,2,128,1,float16,float16,0,0.01249760016798973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,2,128,1,float16,fp8,0,0.012409599870443344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,2,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,4,128,1,float16,float16,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,4,128,1,fp8,fp8,0,0.012511999905109405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,8,128,1,float16,float16,0,0.012566399574279786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,8,128,1,float16,fp8,0,0.012414400279521943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,8,128,1,fp8,fp8,0,0.012401600182056428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,24,128,1,float16,fp8,0,0.01236959993839264
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,24,128,1,float16,float16,0,0.012699200212955475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,24,128,1,fp8,fp8,0,0.012375999987125397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,1,128,1,float16,float16,0,0.012491200119256973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,1,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,1,128,1,fp8,fp8,0,0.010707200318574906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,2,128,1,float16,float16,0,0.011924800276756287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,2,128,1,float16,fp8,0,0.010883200168609618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,4,128,1,float16,float16,0,0.012436799705028534
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,2,128,1,fp8,fp8,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,4,128,1,fp8,fp8,0,0.010686399787664414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,4,128,1,float16,fp8,0,0.01144160032272339
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,8,128,1,float16,float16,0,0.012307199835777282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,8,128,1,float16,fp8,0,0.011097600311040878
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,24,8,128,1,fp8,fp8,0,0.012358400225639343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,24,128,1,float16,float16,0,0.012465599924325943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,24,128,1,float16,fp8,0,0.012398400157690049
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,24,128,1,fp8,fp8,0,0.011500799655914306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,1,128,1,float16,float16,0,0.012411200255155564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,1,128,1,float16,fp8,0,0.011380799859762192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,1,128,1,fp8,fp8,0,0.011155200004577637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,2,128,1,float16,float16,0,0.012465599924325943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,2,128,1,float16,fp8,0,0.011604800075292587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,4,128,1,float16,float16,0,0.01241919994354248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,2,128,1,fp8,fp8,0,0.01106560006737709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,24,24,128,1,fp8,fp8,0,0.0764303982257843
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,4,128,1,float16,fp8,0,0.011416000127792359
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,4,128,1,fp8,fp8,0,0.01156800016760826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,8,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,8,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,24,8,128,1,fp8,fp8,0,0.012051200121641159
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,24,1,128,1,float16,float16,0,0.1452847957611084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,24,1,128,1,fp8,fp8,0,0.15027999877929688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,24,2,128,1,float16,float16,0,0.1442512035369873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,24,2,128,1,float16,fp8,0,0.15045759677886963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,24,2,128,1,fp8,fp8,0,0.15128159523010254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,24,4,128,1,float16,float16,0,0.14974240064620972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,24,4,128,1,float16,fp8,0,0.01250080019235611
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,24,4,128,1,float16,fp8,0,0.15083680152893067
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,24,4,128,1,fp8,fp8,0,0.1503119945526123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,24,8,128,1,float16,float16,0,0.15795680284500122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,24,8,128,1,float16,fp8,0,0.1513167977333069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,24,8,128,1,fp8,fp8,0,0.14988479614257813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,24,128,1,float16,fp8,0,0.09746080040931701
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,24,128,1,fp8,fp8,0,0.09647520184516907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,1,128,1,float16,float16,0,0.07818880081176757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,1,128,1,float16,fp8,0,0.0799120008945465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,1,128,1,fp8,fp8,0,0.07964479923248291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,2,128,1,float16,float16,0,0.0780896008014679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,2,128,1,fp8,fp8,0,0.08003680109977722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,4,128,1,float16,float16,0,0.08105760216712951
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,4,128,1,float16,fp8,0,0.08018239736557006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,4,128,1,fp8,fp8,0,0.08015040159225464
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,8,128,1,float16,float16,0,0.0860319972038269
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,8,128,1,float16,fp8,0,0.0801472008228302
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,8,128,1,fp8,fp8,0,0.08012160062789916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,24,1,128,1,float16,fp8,0,0.1499951958656311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,24,128,1,float16,float16,0,0.055511999130249026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,24,128,1,float16,fp8,0,0.05338720083236694
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,24,128,1,fp8,fp8,0,0.053446400165557864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,1,128,1,float16,float16,0,0.04332799911499023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,1,128,1,float16,fp8,0,0.04508480131626129
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,1,128,1,fp8,fp8,0,0.04508320093154907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,24,128,1,float16,float16,0,0.100547194480896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,2,128,1,float16,float16,0,0.04319039881229401
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,2,128,1,float16,fp8,0,0.04502240121364594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,2,128,1,fp8,fp8,0,0.04455839991569519
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,4,128,1,float16,float16,0,0.04352000057697296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,4,128,1,float16,fp8,0,0.04459680020809174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,4,128,1,fp8,fp8,0,0.04345119893550873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,8,128,1,float16,float16,0,0.04737440049648285
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,8,128,1,float16,fp8,0,0.04327679872512817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,24,128,1,float16,float16,0,0.03094240128993988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,24,128,1,float16,fp8,0,0.030969598889350893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,24,128,1,fp8,fp8,0,0.030899199843406677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,1,128,1,float16,float16,0,0.026796799898147584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,1,128,1,float16,fp8,0,0.02677919864654541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,1,128,1,fp8,fp8,0,0.026943999528884887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,2,128,1,float16,float16,0,0.02685759961605072
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,2,128,1,float16,fp8,0,0.02682879865169525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,2,128,1,fp8,fp8,0,0.026800000667572023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,4,128,1,float16,float16,0,0.026833599805831908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,4,128,1,float16,fp8,0,0.02683840095996857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,4,128,1,fp8,fp8,0,0.026825600862503053
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,8,128,1,float16,float16,0,0.028856000304222106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,8,128,1,float16,fp8,0,0.0268095999956131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,24,8,128,1,fp8,fp8,0,0.026811200380325317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,24,128,1,float16,float16,0,0.0186831995844841
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,24,128,1,float16,fp8,0,0.01870400011539459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,24,128,1,fp8,fp8,0,0.01870879977941513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,1,128,1,float16,float16,0,0.0176816001534462
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,1,128,1,float16,fp8,0,0.01833759993314743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,24,2,128,1,float16,fp8,0,0.08009759783744812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,2,128,1,float16,float16,0,0.01839359998703003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,1,128,1,fp8,fp8,0,0.01855199933052063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,2,128,1,float16,fp8,0,0.017726400494575502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,24,8,128,1,fp8,fp8,0,0.04457600116729736
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,2,128,1,fp8,fp8,0,0.018646399676799773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,4,128,1,float16,fp8,0,0.01720000058412552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,8,128,1,float16,float16,0,0.018639999628067016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,8,128,1,float16,fp8,0,0.017876799404621124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,8,128,1,fp8,fp8,0,0.016577599942684172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,24,128,1,float16,float16,0,0.014528000354766845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,24,128,1,float16,fp8,0,0.014496000111103058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,1,128,1,float16,float16,0,0.014371199905872345
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,1,128,1,float16,fp8,0,0.01451359987258911
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,1,128,1,fp8,fp8,0,0.014472000300884247
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,2,128,1,float16,float16,0,0.014452800154685974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,2,128,1,float16,fp8,0,0.014484800398349762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,2,128,1,fp8,fp8,0,0.014507199823856353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,4,128,1,float16,float16,0,0.014524799585342408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,4,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,4,128,1,fp8,fp8,0,0.014532800018787383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,8,128,1,float16,float16,0,0.014555199444293976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,8,128,1,float16,fp8,0,0.01451839953660965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,8,128,1,fp8,fp8,0,0.014440000057220459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,24,128,1,float16,float16,0,0.012486399710178375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,24,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,24,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,1,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,1,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,1,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,2,128,1,float16,float16,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,2,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,2,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,4,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,4,128,1,float16,float16,0,0.018713599443435668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,24,4,128,1,fp8,fp8,0,0.018177600204944612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,4,128,1,fp8,fp8,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,8,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,8,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,8,128,1,fp8,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,24,24,128,1,fp8,fp8,0,0.014531199634075165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,24,128,1,float16,fp8,0,0.010569600015878677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,24,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,1,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,1,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,1,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,2,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,2,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,2,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,4,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,4,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,8,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,8,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,8,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,24,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,24,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,24,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,1,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,1,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,1,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,2,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,2,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,2,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,4,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,24,4,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,4,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,4,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,24,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,8,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,8,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,24,8,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,24,1,128,1,float16,float16,0,0.12483680248260498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,24,1,128,1,float16,fp8,0,0.12216800451278687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,24,4,128,1,float16,fp8,0,0.010564800351858139
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,24,2,128,1,float16,float16,0,0.123363196849823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,24,2,128,1,float16,fp8,0,0.12313280105590821
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,24,2,128,1,fp8,fp8,0,0.12314879894256592
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,24,4,128,1,float16,float16,0,0.12662880420684813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,24,4,128,1,float16,fp8,0,0.12309600114822387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,24,4,128,1,fp8,fp8,0,0.12300800085067749
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,24,8,128,1,float16,float16,0,0.1296303987503052
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,24,8,128,1,float16,fp8,0,0.12311359643936157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,24,8,128,1,fp8,fp8,0,0.12302559614181519
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,24,128,1,float16,float16,0,0.07801439762115478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,24,128,1,float16,fp8,0,0.07394880056381226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,24,128,1,fp8,fp8,0,0.07411999702453613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,1,128,1,float16,float16,0,0.06577280163764954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,1,128,1,float16,fp8,0,0.0658079981803894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,1,128,1,fp8,fp8,0,0.0657696008682251
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,2,128,1,float16,float16,0,0.06574079990386963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,2,128,1,float16,fp8,0,0.0657263994216919
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,2,128,1,fp8,fp8,0,0.06571679711341857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,4,128,1,float16,float16,0,0.06573759913444518
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,4,128,1,float16,fp8,0,0.06576160192489625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,4,128,1,fp8,fp8,0,0.06570879817008972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,8,128,1,float16,float16,0,0.06975679993629455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,8,128,1,float16,fp8,0,0.06572319865226746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,24,8,128,1,fp8,fp8,0,0.06577119827270508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,24,128,1,float16,float16,0,0.04113920032978058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,24,128,1,float16,fp8,0,0.04121440052986145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,24,128,1,fp8,fp8,0,0.0410863995552063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,1,128,1,float16,float16,0,0.03834879994392395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,1,128,1,float16,fp8,0,0.03701440095901489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,2,128,1,float16,float16,0,0.037027201056480406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,24,1,128,1,fp8,fp8,0,0.12322239875793457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,2,128,1,float16,fp8,0,0.03704479932785034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,4,128,1,float16,float16,0,0.038894400000572205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,4,128,1,float16,fp8,0,0.03705439865589142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,4,128,1,fp8,fp8,0,0.03720960021018982
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,8,128,1,float16,float16,0,0.03908959925174713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,8,128,1,float16,fp8,0,0.03701759874820709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,8,128,1,fp8,fp8,0,0.03707999885082245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,24,128,1,float16,float16,0,0.02484800070524216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,24,128,1,float16,fp8,0,0.025028800964355467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,24,128,1,fp8,fp8,0,0.024878400564193725
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,1,128,1,float16,fp8,0,0.022699199616909027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,1,128,1,fp8,fp8,0,0.022742399573326112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,2,128,1,float16,float16,0,0.022870400547981264
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,2,128,1,float16,fp8,0,0.022737599909305573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,2,128,1,fp8,fp8,0,0.022753599286079406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,4,128,1,float16,float16,0,0.02470560073852539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,4,128,1,float16,fp8,0,0.02285760045051575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,4,128,1,fp8,fp8,0,0.022761599719524385
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,8,128,1,float16,float16,0,0.02470560073852539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,8,128,1,float16,fp8,0,0.02287680059671402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,8,128,1,fp8,fp8,0,0.022759999334812164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,1,128,1,fp8,fp8,0,0.0371535986661911
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,24,128,1,float16,float16,0,0.016547200083732606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,24,128,1,float16,fp8,0,0.016705599427223206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,24,128,1,fp8,fp8,0,0.016550399363040924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,1,128,1,float16,float16,0,0.016547200083732606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,1,128,1,float16,fp8,0,0.015719999372959138
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,1,128,1,fp8,fp8,0,0.016564799845218657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,2,128,1,float16,float16,0,0.01656319946050644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,24,2,128,1,fp8,fp8,0,0.03712800145149231
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,2,128,1,float16,fp8,0,0.016516800224781036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,2,128,1,fp8,fp8,0,0.01626240015029907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,4,128,1,float16,float16,0,0.016678400337696075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,4,128,1,float16,fp8,0,0.016579200327396394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,4,128,1,fp8,fp8,0,0.016553600132465363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,8,128,1,float16,float16,0,0.0166703999042511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,8,128,1,float16,fp8,0,0.016510400176048278
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,24,128,1,float16,float16,0,0.014414399862289429
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,24,128,1,float16,fp8,0,0.014467200636863709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,24,128,1,fp8,fp8,0,0.012577599287033081
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,1,128,1,float16,float16,0,0.012548799812793731
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,1,128,1,float16,fp8,0,0.012540799379348756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,1,128,1,fp8,fp8,0,0.012531200051307678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,2,128,1,float16,float16,0,0.012611199915409089
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,2,128,1,float16,fp8,0,0.012529599666595458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,2,128,1,fp8,fp8,0,0.012567999958992004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,4,128,1,float16,float16,0,0.012748800218105316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,4,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,4,128,1,fp8,fp8,0,0.012683199346065521
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,24,1,128,1,float16,float16,0,0.024897600710391998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,8,128,1,fp8,fp8,0,0.012449599802494049
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,24,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,24,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,24,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,1,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,1,128,1,float16,fp8,0,0.010339199751615524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,1,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,2,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,2,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,24,8,128,1,fp8,fp8,0,0.016633599996566772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,2,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,4,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,4,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,4,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,8,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,8,128,1,float16,float16,0,0.012571200728416443
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,8,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,24,8,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,24,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,24,128,1,float16,fp8,0,0.010326399654150008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,24,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,1,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,1,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,1,128,1,fp8,fp8,0,0.010276799649000167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,2,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,2,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,4,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,4,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,4,128,1,fp8,fp8,0,0.010294400155544281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,8,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,8,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,8,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,24,128,1,float16,float16,0,0.010342399775981902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,24,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,24,128,1,fp8,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,1,128,1,float16,float16,0,0.010251200199127198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,1,128,1,float16,fp8,0,0.010364799946546554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,1,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,2,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,2,128,1,float16,fp8,0,0.010328000038862228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,2,128,1,fp8,fp8,0,0.010299199819564819
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,4,128,1,float16,float16,0,0.010273600369691849
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,4,128,1,float16,fp8,0,0.010311999917030334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,8,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,4,128,1,fp8,fp8,0,0.008590400218963623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,24,8,128,1,float16,fp8,0,0.012540799379348756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,8,128,1,float16,fp8,0,0.010320000350475311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,24,8,128,1,fp8,fp8,0,0.00862400010228157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,24,1,128,1,float16,float16,0,0.11648639440536498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,24,1,128,1,float16,fp8,0,0.11092480421066284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,24,1,128,1,fp8,fp8,0,0.11118240356445312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,24,2,128,1,float16,float16,0,0.11653439998626709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,24,2,128,1,float16,fp8,0,0.1119488000869751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,24,2,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,24,2,128,1,fp8,fp8,0,0.11111680269241334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,24,4,128,1,float16,fp8,0,0.11266880035400391
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,24,4,128,1,fp8,fp8,0,0.11161760091781617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,24,8,128,1,float16,float16,0,0.12130240201950074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,24,8,128,1,float16,fp8,0,0.11150879859924316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,24,128,1,float16,float16,0,0.06791200041770935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,24,128,1,float16,fp8,0,0.06407359838485718
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,24,128,1,fp8,fp8,0,0.06383200287818909
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,1,128,1,float16,float16,0,0.06368479728698731
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,1,128,1,fp8,fp8,0,0.05959039926528931
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,2,128,1,float16,float16,0,0.06376960277557372
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,2,128,1,float16,fp8,0,0.059729599952697755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,2,128,1,fp8,fp8,0,0.059910398721694944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,4,128,1,float16,float16,0,0.06373599767684937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,4,128,1,float16,fp8,0,0.06012480258941651
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,4,128,1,fp8,fp8,0,0.05975679755210876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,8,128,1,float16,float16,0,0.06524639725685119
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,8,128,1,float16,fp8,0,0.0599120020866394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,8,128,1,fp8,fp8,0,0.0600928008556366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,24,128,1,float16,float16,0,0.03727520108222961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,24,128,1,float16,fp8,0,0.03704479932785034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,24,128,1,fp8,fp8,0,0.03709760010242462
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,1,128,1,float16,float16,0,0.03541919887065888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,1,128,1,float16,fp8,0,0.03496800065040588
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,1,128,1,fp8,fp8,0,0.035175999999046324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,2,128,1,float16,float16,0,0.03705439865589142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,24,8,128,1,fp8,fp8,0,0.11123039722442626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,2,128,1,fp8,fp8,0,0.03497920036315918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,4,128,1,float16,float16,0,0.03590559959411621
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,4,128,1,float16,fp8,0,0.03498719930648804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,24,1,128,1,float16,fp8,0,0.0599839985370636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,4,128,1,fp8,fp8,0,0.03499200046062469
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,8,128,1,float16,float16,0,0.03753759860992432
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,8,128,1,float16,fp8,0,0.035025599598884585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,8,128,1,fp8,fp8,0,0.034964799880981445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,24,128,1,float16,float16,0,0.022742399573326112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,24,128,1,float16,fp8,0,0.022785599529743194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,24,128,1,fp8,fp8,0,0.02276480048894882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,1,128,1,float16,float16,0,0.022711999714374542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,1,128,1,float16,fp8,0,0.02266560047864914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,1,128,1,fp8,fp8,0,0.02268480062484741
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,2,128,1,float16,float16,0,0.022716799378395082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,2,128,1,float16,fp8,0,0.022732800245285033
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,2,128,1,fp8,fp8,0,0.02272160053253174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,4,128,1,float16,float16,0,0.022705599665641785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,4,128,1,float16,fp8,0,0.022694399952888487
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,4,128,1,fp8,fp8,0,0.022511999309062957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,8,128,1,float16,float16,0,0.022758400440216063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,8,128,1,float16,fp8,0,0.022735999524593355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,24,8,128,1,fp8,fp8,0,0.022593599557876588
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,24,128,1,float16,float16,0,0.016551999747753142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,24,128,1,float16,fp8,0,0.014641599357128143
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,24,128,1,fp8,fp8,0,0.016569599509239197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,1,128,1,float16,float16,0,0.014582400023937226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,1,128,1,float16,fp8,0,0.014582400023937226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,1,128,1,fp8,fp8,0,0.014580799639225006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,2,128,1,float16,float16,0,0.014534400403499603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,2,128,1,float16,fp8,0,0.014547200500965118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,2,128,1,fp8,fp8,0,0.014524799585342408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,4,128,1,float16,float16,0,0.016564799845218657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,24,4,128,1,float16,float16,0,0.11693600416183472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,4,128,1,float16,fp8,0,0.014587199687957764
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,4,128,1,fp8,fp8,0,0.014590400457382201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,8,128,1,float16,float16,0,0.016582399606704712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,8,128,1,float16,fp8,0,0.014668799936771393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,24,8,128,1,fp8,fp8,0,0.014526399970054626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,24,128,1,float16,float16,0,0.012865599989891053
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,24,2,128,1,float16,fp8,0,0.03492160141468048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,24,128,1,float16,fp8,0,0.012449599802494049
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,1,128,1,float16,float16,0,0.012561599910259246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,1,128,1,fp8,fp8,0,0.012513600289821625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,2,128,1,float16,float16,0,0.012582400441169738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,2,128,1,float16,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,2,128,1,fp8,fp8,0,0.012460800260305405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,4,128,1,float16,float16,0,0.012596799433231354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,4,128,1,float16,fp8,0,0.012399999797344208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,4,128,1,fp8,fp8,0,0.012529599666595458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,8,128,1,float16,float16,0,0.01250240057706833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,8,128,1,float16,fp8,0,0.012542399764060973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,8,128,1,fp8,fp8,0,0.012566399574279786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,24,128,1,float16,float16,0,0.010628800094127654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,24,128,1,float16,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,24,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,1,128,1,float16,float16,0,0.010619200021028518
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,1,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,1,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,2,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,2,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,2,128,1,fp8,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,4,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,4,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,4,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,8,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,8,128,1,float16,fp8,0,0.010284800082445145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,24,8,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,24,128,1,float16,float16,0,0.010609599947929382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,24,128,1,float16,fp8,0,0.010316800326108932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,24,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,1,128,1,float16,float16,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,1,128,1,fp8,fp8,0,0.009300799667835235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,2,128,1,float16,float16,0,0.009864000231027603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,2,128,1,float16,fp8,0,0.008582399785518646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,2,128,1,fp8,fp8,0,0.008627200126647949
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,4,128,1,float16,float16,0,0.00965920016169548
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,4,128,1,float16,fp8,0,0.009619200229644775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,4,128,1,fp8,fp8,0,0.01053759977221489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,8,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,8,128,1,float16,fp8,0,0.009759999811649323
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,8,128,1,fp8,fp8,0,0.00979200005531311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,24,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,24,128,1,float16,fp8,0,0.009646400064229965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,24,128,1,fp8,fp8,0,0.009548799693584442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,1,128,1,float16,float16,0,0.008502399921417237
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,1,128,1,float16,fp8,0,0.008475200086832047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,24,128,1,fp8,fp8,0,0.012665599584579468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,1,128,1,fp8,fp8,0,0.008459199965000153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,24,1,128,1,float16,fp8,0,0.01247519999742508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,2,128,1,float16,float16,0,0.010364799946546554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,2,128,1,float16,fp8,0,0.008436799794435502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,4,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,4,128,1,float16,fp8,0,0.008523199707269669
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,4,128,1,fp8,fp8,0,0.009160000085830688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,8,128,1,float16,float16,0,0.008489599823951722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,8,128,1,float16,fp8,0,0.008979199826717377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,8,128,1,fp8,fp8,0,0.008500800281763077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,24,1,128,1,float16,fp8,0,0.009487999975681305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,24,1,128,1,float16,fp8,0,0.10673439502716064
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,24,1,128,1,float16,float16,0,0.11503839492797852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,24,2,128,1,float16,fp8,0,0.10679680109024048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,24,1,128,1,fp8,fp8,0,0.10679199695587158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,24,2,128,1,float16,float16,0,0.11684800386428833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,24,2,128,1,fp8,fp8,0,0.10687999725341797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,24,4,128,1,float16,float16,0,0.11501439809799194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,24,4,128,1,float16,fp8,0,0.10672320127487182
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,24,4,128,1,fp8,fp8,0,0.10682400465011596
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,24,8,128,1,float16,float16,0,0.11518559455871583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,24,8,128,1,float16,fp8,0,0.1067088007926941
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,0,0.06360160112380982
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,0,0.058392000198364255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,24,128,1,fp8,fp8,0,0.05953599810600281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,1,128,1,float16,float16,0,0.06328960061073304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,1,128,1,float16,fp8,0,0.0582319974899292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,1,128,1,fp8,fp8,0,0.05758559703826904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,2,128,1,float16,float16,0,0.06366400122642517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,2,128,1,float16,fp8,0,0.05767199993133545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,2,128,1,fp8,fp8,0,0.05892159938812256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,4,128,1,float16,float16,0,0.06368160247802734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,4,128,1,float16,fp8,0,0.0587552011013031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,4,128,1,fp8,fp8,0,0.0576911985874176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,8,128,1,float16,float16,0,0.06351839900016784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,8,128,1,float16,fp8,0,0.05759360194206238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,24,8,128,1,fp8,fp8,0,0.05915840268135071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,0,0.03517920076847077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,0,0.03442400097846985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,24,128,1,fp8,fp8,0,0.033020800352096556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,1,128,1,float16,float16,0,0.035097599029541016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,1,128,1,float16,fp8,0,0.03392800092697144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,1,128,1,fp8,fp8,0,0.034052801132202146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,24,2,128,1,fp8,fp8,0,0.008500800281763077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,2,128,1,float16,fp8,0,0.033000001311302186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,2,128,1,float16,float16,0,0.03513599932193756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,2,128,1,fp8,fp8,0,0.032927998900413515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,4,128,1,float16,fp8,0,0.033051198720932005
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,4,128,1,fp8,fp8,0,0.03299199938774109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,8,128,1,float16,float16,0,0.035016000270843506
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,8,128,1,float16,fp8,0,0.03293600082397461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,8,128,1,fp8,fp8,0,0.032996800541877744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,0,0.022708800435066224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,0,0.02146719992160797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,24,8,128,1,fp8,fp8,0,0.10682079792022706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,24,128,1,fp8,fp8,0,0.02120320051908493
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,1,128,1,float16,float16,0,0.02284799963235855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,1,128,1,float16,fp8,0,0.020769600570201874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,1,128,1,fp8,fp8,0,0.02258239984512329
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,2,128,1,float16,float16,0,0.0226623997092247
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,2,128,1,float16,fp8,0,0.022679999470710754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,2,128,1,fp8,fp8,0,0.020769600570201874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,4,128,1,float16,float16,0,0.02269600033760071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,4,128,1,float16,fp8,0,0.020759999752044678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,4,128,1,fp8,fp8,0,0.02090719938278198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,8,128,1,float16,float16,0,0.022673599421977997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,8,128,1,float16,fp8,0,0.021556800603866576
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,24,8,128,1,fp8,fp8,0,0.020656000077724456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,0,0.016516800224781036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,0,0.01465280055999756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,24,128,1,fp8,fp8,0,0.014643199741840363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,1,128,1,float16,float16,0,0.016155199706554414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,1,128,1,float16,fp8,0,0.01467359960079193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,1,128,1,fp8,fp8,0,0.014601600170135499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,2,128,1,float16,float16,0,0.016550399363040924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,2,128,1,float16,fp8,0,0.014523200690746307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,2,128,1,fp8,fp8,0,0.014596800506114959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,4,128,1,float16,float16,0,0.016473600268363954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,4,128,1,float16,fp8,0,0.014601600170135499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,8,128,1,float16,float16,0,0.01656319946050644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,8,128,1,float16,fp8,0,0.014636799693107605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,24,4,128,1,float16,float16,0,0.035132798552513125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,0,0.014526399970054626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,0,0.01247519999742508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,24,128,1,fp8,fp8,0,0.012455999851226807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,1,128,1,float16,float16,0,0.012654399871826172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,1,128,1,float16,fp8,0,0.01241919994354248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,1,128,1,fp8,fp8,0,0.012455999851226807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,2,128,1,float16,float16,0,0.014164799451828003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,2,128,1,float16,fp8,0,0.0125231996178627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,2,128,1,fp8,fp8,0,0.012503999471664428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,4,128,1,float16,float16,0,0.014435200393199921
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,4,128,1,fp8,fp8,0,0.012457600235939026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,8,128,1,float16,float16,0,0.013875199854373932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,8,128,1,float16,fp8,0,0.012417600303888322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,8,128,1,fp8,fp8,0,0.01239359974861145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,24,128,1,fp8,fp8,0,0.010356800258159637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,1,128,1,float16,float16,0,0.010567999631166457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,1,128,1,float16,fp8,0,0.010329599678516387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,1,128,1,fp8,fp8,0,0.010353600233793258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,2,128,1,float16,float16,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,4,128,1,fp8,fp8,0,0.014497600495815277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,2,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,2,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,4,128,1,float16,float16,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,4,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,4,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,24,8,128,1,fp8,fp8,0,0.014588800072669984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,8,128,1,float16,fp8,0,0.010313600301742554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,8,128,1,fp8,fp8,0,0.010251200199127198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,0,0.010075200349092484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,24,128,1,fp8,fp8,0,0.010308799892663955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,1,128,1,float16,float16,0,0.010355199873447418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,1,128,1,float16,fp8,0,0.010278400033712387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,1,128,1,fp8,fp8,0,0.009369599819183349
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,2,128,1,float16,float16,0,0.010328000038862228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,2,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,2,128,1,fp8,fp8,0,0.010264000296592713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,4,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,4,128,1,fp8,fp8,0,0.010335999727249145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,8,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,8,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,8,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,24,4,128,1,float16,fp8,0,0.012639999389648438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,24,128,1,fp8,fp8,0,0.008515200018882752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,1,128,1,float16,float16,0,0.010358399897813796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,1,128,1,float16,fp8,0,0.00860000029206276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,1,128,1,fp8,fp8,0,0.010315199941396713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,2,128,1,float16,fp8,0,0.01032319962978363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,2,128,1,fp8,fp8,0,0.010355199873447418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,4,128,1,float16,float16,0,0.010369600355625152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,4,128,1,float16,fp8,0,0.008803199976682663
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,4,128,1,fp8,fp8,0,0.008449599891901017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,8,128,1,float16,float16,0,0.01034879982471466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,8,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,24,8,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,8,128,1,fp8,fp8,0,0.008403199911117553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,24,4,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,24,2,128,1,float16,float16,0,0.010518400371074677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,16,1,128,1,float16,fp8,0,3.9688495635986327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,16,1,128,1,float16,float16,0,5.016704177856445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,16,1,128,1,fp8,fp8,0,3.9244094848632813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,16,2,128,1,float16,fp8,0,3.983270263671875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,16,2,128,1,fp8,fp8,0,3.9160289764404297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,16,2,128,1,float16,float16,0,5.1531024932861325
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,16,4,128,1,float16,float16,0,5.040478515625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,16,4,128,1,float16,fp8,0,3.989654541015625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,16,4,128,1,fp8,fp8,0,3.936054229736328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,16,8,128,1,float16,fp8,0,3.9702049255371095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,16,8,128,1,float16,float16,0,5.628180694580078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,16,8,128,1,fp8,fp8,0,3.969124984741211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,1,128,1,float16,float16,0,2.4410816192626954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,16,128,1,float16,fp8,0,2.0745744705200195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,16,128,1,fp8,fp8,0,2.0856800079345703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,1,128,1,float16,fp8,0,2.169478416442871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,1,128,1,fp8,fp8,0,2.0180431365966798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,2,128,1,float16,float16,0,2.5279279708862306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,2,128,1,float16,fp8,0,1.9975008010864257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,2,128,1,fp8,fp8,0,2.014044761657715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,4,128,1,float16,fp8,0,2.078508758544922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,4,128,1,float16,float16,0,2.676003265380859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,4,128,1,fp8,fp8,0,2.028988838195801
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,8,128,1,float16,float16,0,2.5010000228881837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,8,128,1,float16,fp8,0,2.2524591445922852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,8,128,1,fp8,fp8,0,2.0423616409301757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,16,128,1,float16,fp8,0,1.1018431663513184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,16,128,1,fp8,fp8,0,1.086511993408203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,1,128,1,float16,float16,0,1.289132785797119
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,1,128,1,float16,fp8,0,1.0402848243713378
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,1,128,1,fp8,fp8,0,1.1391728401184082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,2,128,1,float16,float16,0,1.2605615615844727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,2,128,1,float16,fp8,0,1.0930751800537108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,2,128,1,fp8,fp8,0,1.144559955596924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,4,128,1,float16,float16,0,1.3032015800476073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,4,128,1,fp8,fp8,0,1.1490880012512208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,8,128,1,float16,float16,0,1.328495979309082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,4,128,1,float16,fp8,0,1.044051170349121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,8,128,1,float16,fp8,0,1.1683712005615234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,8,128,1,fp8,fp8,0,1.04760799407959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,16,128,1,float16,fp8,0,0.592307186126709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,16,128,1,fp8,fp8,0,0.5890096187591553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,1,128,1,float16,float16,0,0.6734879970550537
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,1,128,1,float16,fp8,0,0.5650640010833741
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,1,128,1,fp8,fp8,0,0.5643504142761231
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,2,128,1,float16,float16,0,0.673956823348999
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,2,128,1,float16,fp8,0,0.5639647960662841
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,2,128,1,fp8,fp8,0,0.5651472091674805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,4,128,1,float16,float16,0,0.6786752223968506
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,4,128,1,float16,fp8,0,0.5658239841461181
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,4,128,1,fp8,fp8,0,0.5656559944152832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,8,128,1,float16,float16,0,0.6999599933624268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,8,128,1,float16,fp8,0,0.567568016052246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,8,128,1,fp8,fp8,0,0.5669280052185058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,16,1,128,1,float16,float16,0,2.8018592834472655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,16,1,128,1,float16,fp8,0,2.7231071472167967
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,16,1,128,1,fp8,fp8,0,2.3099632263183594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,16,2,128,1,float16,float16,0,2.8676624298095703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,16,2,128,1,float16,fp8,0,2.554068756103516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,16,2,128,1,fp8,fp8,0,2.3109952926635744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,16,4,128,1,float16,float16,0,2.850387191772461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,16,4,128,1,float16,fp8,0,2.533518409729004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,16,4,128,1,fp8,fp8,0,2.313747215270996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,16,8,128,1,float16,float16,0,2.9233375549316407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,16,8,128,1,float16,fp8,0,2.4447343826293944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,16,8,128,1,fp8,fp8,0,2.367299270629883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,16,128,1,float16,float16,0,1.5446800231933593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,16,16,128,1,float16,float16,0,0.740015983581543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,16,16,128,1,float16,float16,0,2.5492687225341797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,16,16,128,1,float16,float16,0,1.3237855911254883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,16,128,1,float16,fp8,0,1.3449664115905762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,16,128,1,fp8,fp8,0,1.2486783981323242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,1,128,1,float16,float16,0,1.423481559753418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,1,128,1,float16,fp8,0,1.2506095886230468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,1,128,1,fp8,fp8,0,1.184398365020752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,2,128,1,float16,float16,0,1.455395221710205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,2,128,1,float16,fp8,0,1.2979056358337402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,2,128,1,fp8,fp8,0,1.229196834564209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,4,128,1,float16,float16,0,1.4395631790161132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,4,128,1,float16,fp8,0,1.209059238433838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,4,128,1,fp8,fp8,0,1.206710433959961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,8,128,1,float16,float16,0,1.4471936225891113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,8,128,1,float16,fp8,0,1.3748944282531739
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,16,128,1,float16,float16,0,0.8058112144470215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,16,8,128,1,fp8,fp8,0,1.2002960205078126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,16,128,1,float16,fp8,0,0.6677023887634277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,16,128,1,fp8,fp8,0,0.7020304203033447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,1,128,1,float16,float16,0,0.7321135997772217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,1,128,1,float16,fp8,0,0.6323520183563233
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,1,128,1,fp8,fp8,0,0.6782639980316162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,2,128,1,float16,float16,0,0.739628791809082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,2,128,1,float16,fp8,0,0.6255551815032959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,2,128,1,fp8,fp8,0,0.6400527954101562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,4,128,1,float16,fp8,0,0.6511472225189209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,4,128,1,float16,float16,0,0.7679567813873291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,4,128,1,fp8,fp8,0,0.644268798828125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,8,128,1,float16,float16,0,0.79202241897583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,8,128,1,float16,fp8,0,0.6563392162322998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,16,128,1,float16,float16,0,0.45181121826171877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,16,128,1,float16,fp8,0,0.3668783903121948
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,16,128,1,fp8,fp8,0,0.3700416088104248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,1,128,1,float16,float16,0,0.4027391910552979
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,1,128,1,float16,fp8,0,0.34876160621643065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,1,128,1,fp8,fp8,0,0.3464384078979492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,2,128,1,float16,float16,0,0.41223998069763185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,2,128,1,float16,fp8,0,0.3465967893600464
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,2,128,1,fp8,fp8,0,0.3481487989425659
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,4,128,1,float16,float16,0,0.4099440097808838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,4,128,1,float16,fp8,0,0.34831039905548095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,4,128,1,fp8,fp8,0,0.3476736068725586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,8,128,1,float16,float16,0,0.42876319885253905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,8,128,1,float16,fp8,0,0.35253760814666746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,16,8,128,1,fp8,fp8,0,0.3510735988616943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,16,8,128,1,fp8,fp8,0,0.63056321144104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,16,1,128,1,float16,fp8,0,1.6611967086791992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,16,1,128,1,float16,float16,0,1.9612720489501954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,16,1,128,1,fp8,fp8,0,1.6530496597290039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,16,2,128,1,fp8,fp8,0,1.6550016403198242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,16,2,128,1,float16,fp8,0,1.7307071685791016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,16,2,128,1,float16,float16,0,2.021286392211914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,16,4,128,1,float16,float16,0,1.9746831893920898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,16,4,128,1,float16,fp8,0,1.6607328414916993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,16,4,128,1,fp8,fp8,0,1.6611007690429687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,16,8,128,1,float16,fp8,0,1.8105199813842774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,16,8,128,1,float16,float16,0,2.24139518737793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,16,128,1,float16,float16,0,1.1303695678710937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,16,8,128,1,fp8,fp8,0,1.6654815673828125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,16,128,1,float16,fp8,0,0.908784008026123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,16,128,1,fp8,fp8,0,1.0015215873718262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,1,128,1,float16,float16,0,1.015552043914795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,1,128,1,float16,fp8,0,0.8579903602600097
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,1,128,1,fp8,fp8,0,0.8573360443115234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,2,128,1,float16,float16,0,1.0365663528442384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,2,128,1,float16,fp8,0,0.92457275390625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,2,128,1,fp8,fp8,0,0.860318374633789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,4,128,1,float16,float16,0,1.016320037841797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,4,128,1,float16,fp8,0,0.9623135566711426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,4,128,1,fp8,fp8,0,0.9046976089477539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,8,128,1,float16,float16,0,1.05381441116333
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,8,128,1,float16,fp8,0,0.8644783973693848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,16,128,1,float16,float16,0,0.6034543991088868
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,16,8,128,1,fp8,fp8,0,0.8981552124023438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,16,128,1,float16,fp8,0,0.5301680088043212
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,1,128,1,float16,fp8,0,0.4602928161621094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,1,128,1,float16,float16,0,0.5368319988250733
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,1,128,1,fp8,fp8,0,0.49979357719421386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,2,128,1,float16,fp8,0,0.4612592220306396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,2,128,1,float16,float16,0,0.5468976020812988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,2,128,1,fp8,fp8,0,0.4967663764953613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,4,128,1,float16,fp8,0,0.46077919006347656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,4,128,1,fp8,fp8,0,0.459003210067749
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,8,128,1,float16,float16,0,0.5734384059906006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,8,128,1,float16,fp8,0,0.4614895820617676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,8,128,1,fp8,fp8,0,0.4640048027038574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,16,128,1,float16,float16,0,0.33071041107177734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,16,128,1,float16,fp8,0,0.275272011756897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,16,128,1,fp8,fp8,0,0.27408480644226074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,1,128,1,float16,float16,0,0.29883038997650146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,1,128,1,float16,fp8,0,0.2568831920623779
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,16,128,1,fp8,fp8,0,0.48659358024597166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,1,128,1,fp8,fp8,0,0.2586591958999634
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,2,128,1,float16,float16,0,0.2919136047363281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,2,128,1,float16,fp8,0,0.25727200508117676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,2,128,1,fp8,fp8,0,0.2583600044250488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,4,128,1,float16,float16,0,0.29814560413360597
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,4,128,1,float16,fp8,0,0.2579888105392456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,4,128,1,fp8,fp8,0,0.2586735963821411
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,8,128,1,float16,fp8,0,0.2583280086517334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,8,128,1,fp8,fp8,0,0.2601344108581543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,16,4,128,1,float16,float16,0,0.5473536014556885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,16,1,128,1,float16,fp8,0,2.196137619018555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,16,1,128,1,fp8,fp8,0,2.178363227844238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,16,8,128,1,float16,float16,0,0.30731840133666993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,16,2,128,1,float16,fp8,0,2.1862287521362305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,16,2,128,1,float16,float16,0,2.6519983291625975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,16,2,128,1,fp8,fp8,0,2.1797103881835938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,16,1,128,1,float16,float16,0,2.507107162475586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,16,4,128,1,float16,float16,0,2.6442592620849608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,16,4,128,1,float16,fp8,0,2.429648017883301
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,16,4,128,1,fp8,fp8,0,2.1856063842773437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,16,8,128,1,float16,float16,0,2.676255989074707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,16,128,1,float16,float16,0,1.4818943977355956
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,16,8,128,1,float16,fp8,0,2.290166473388672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,16,8,128,1,fp8,fp8,0,2.3064191818237303
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,16,128,1,float16,fp8,0,1.360860824584961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,16,128,1,fp8,fp8,0,1.2674511909484862
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,1,128,1,float16,fp8,0,1.1315471649169921
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,1,128,1,fp8,fp8,0,1.1162272453308106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,2,128,1,float16,float16,0,1.2760224342346191
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,2,128,1,fp8,fp8,0,1.1141440391540527
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,4,128,1,float16,float16,0,1.3192048072814941
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,4,128,1,float16,fp8,0,1.1157903671264648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,1,128,1,float16,float16,0,1.2981391906738282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,4,128,1,fp8,fp8,0,1.1716863632202148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,8,128,1,float16,float16,0,1.368336009979248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,2,128,1,float16,fp8,0,1.1150192260742187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,8,128,1,float16,fp8,0,1.1224399566650392
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,16,8,128,1,fp8,fp8,0,1.1216704368591308
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,16,128,1,float16,float16,0,0.769817590713501
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,16,128,1,float16,fp8,0,0.6246335983276368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,16,128,1,fp8,fp8,0,0.6233791828155517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,1,128,1,float16,float16,0,0.671772813796997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,1,128,1,float16,fp8,0,0.5831200122833252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,1,128,1,fp8,fp8,0,0.5825615882873535
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,2,128,1,float16,float16,0,0.6744351863861084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,2,128,1,float16,fp8,0,0.5836143970489502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,2,128,1,fp8,fp8,0,0.5822383880615234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,4,128,1,float16,float16,0,0.6888895988464355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,4,128,1,float16,fp8,0,0.5844016075134277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,4,128,1,fp8,fp8,0,0.5835904121398926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,8,128,1,float16,float16,0,0.712665605545044
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,8,128,1,float16,fp8,0,0.5880959987640381
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,16,8,128,1,fp8,fp8,0,0.587775993347168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,16,128,1,float16,fp8,0,0.3388207912445068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,16,128,1,fp8,fp8,0,0.33860480785369873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,1,128,1,float16,float16,0,0.36306400299072267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,1,128,1,float16,fp8,0,0.31559200286865235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,1,128,1,fp8,fp8,0,0.3184000015258789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,2,128,1,float16,float16,0,0.3581631898880005
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,2,128,1,float16,fp8,0,0.31911840438842776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,2,128,1,fp8,fp8,0,0.31579198837280276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,4,128,1,float16,float16,0,0.3728543996810913
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,4,128,1,float16,fp8,0,0.3168960094451904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,4,128,1,fp8,fp8,0,0.3183248043060303
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,8,128,1,float16,float16,0,0.38188800811767576
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,8,128,1,float16,fp8,0,0.31977119445800783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,16,128,1,float16,float16,0,0.23475680351257325
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,8,128,1,fp8,fp8,0,0.32010879516601565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,16,128,1,float16,fp8,0,0.19406239986419677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,16,128,1,fp8,fp8,0,0.19523040056228638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,1,128,1,float16,float16,0,0.2083120107650757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,1,128,1,float16,fp8,0,0.18108479976654052
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,1,128,1,fp8,fp8,0,0.18252960443496705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,2,128,1,float16,fp8,0,0.18251839876174927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,2,128,1,fp8,fp8,0,0.1825055956840515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,16,16,128,1,float16,float16,0,0.4138175964355469
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,4,128,1,float16,float16,0,0.20964159965515136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,4,128,1,float16,fp8,0,0.18257280588150024
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,4,128,1,fp8,fp8,0,0.18247840404510499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,8,128,1,float16,float16,0,0.2180191993713379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,8,128,1,float16,fp8,0,0.1838047981262207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,16,1,128,1,float16,fp8,0,1.316335964202881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,16,1,128,1,float16,float16,0,1.4967791557312011
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,16,1,128,1,fp8,fp8,0,1.3176959991455077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,2,128,1,float16,float16,0,0.20811200141906738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,16,2,128,1,float16,float16,0,1.5277888298034668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,16,8,128,1,fp8,fp8,0,0.18250399827957153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,16,2,128,1,float16,fp8,0,1.315065574645996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,16,2,128,1,fp8,fp8,0,1.3201552391052247
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,16,4,128,1,float16,float16,0,1.634881591796875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,16,4,128,1,float16,fp8,0,1.31671199798584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,16,4,128,1,fp8,fp8,0,1.3204496383666993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,16,8,128,1,float16,float16,0,1.6570320129394531
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,16,8,128,1,float16,fp8,0,1.495680046081543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,16,8,128,1,fp8,fp8,0,1.3243071556091308
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,16,128,1,float16,float16,0,0.9062319755554199
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,16,128,1,float16,fp8,0,0.7481999874114991
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,16,128,1,fp8,fp8,0,0.7445375919342041
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,1,128,1,float16,float16,0,0.7568175792694092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,1,128,1,float16,fp8,0,0.7038112163543702
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,1,128,1,fp8,fp8,0,0.6800000190734863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,2,128,1,float16,float16,0,0.7778351783752442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,2,128,1,float16,fp8,0,0.6823376178741455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,2,128,1,fp8,fp8,0,0.6777503967285157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,4,128,1,float16,float16,0,0.8074383735656738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,4,128,1,float16,fp8,0,0.6828495979309082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,4,128,1,fp8,fp8,0,0.6795152187347412
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,8,128,1,float16,float16,0,0.8234288215637207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,8,128,1,float16,fp8,0,0.6890304088592529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,16,128,1,float16,float16,0,0.475435209274292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,16,128,1,float16,fp8,0,0.3914207935333252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,16,8,128,1,fp8,fp8,0,0.6835968017578125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,16,128,1,fp8,fp8,0,0.39290080070495603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,1,128,1,float16,float16,0,0.4001296043395996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,1,128,1,float16,fp8,0,0.358571195602417
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,1,128,1,fp8,fp8,0,0.3590735912322998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,2,128,1,float16,float16,0,0.4143519878387451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,2,128,1,float16,fp8,0,0.36353440284729005
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,2,128,1,fp8,fp8,0,0.3592736005783081
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,4,128,1,float16,float16,0,0.419382381439209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,4,128,1,fp8,fp8,0,0.359935998916626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,8,128,1,float16,float16,0,0.4333487987518311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,8,128,1,float16,fp8,0,0.36562559604644773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,16,128,1,float16,float16,0,0.26224000453948976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,16,128,1,float16,fp8,0,0.22006080150604249
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,16,128,1,fp8,fp8,0,0.21728320121765138
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,1,128,1,float16,float16,0,0.22217280864715577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,1,128,1,float16,fp8,0,0.19886399507522584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,1,128,1,fp8,fp8,0,0.1990224003791809
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,2,128,1,float16,float16,0,0.22179200649261474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,2,128,1,float16,fp8,0,0.19908640384674073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,2,128,1,fp8,fp8,0,0.19935519695281984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,4,128,1,float16,float16,0,0.2273616075515747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,4,128,1,float16,fp8,0,0.19914079904556276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,4,128,1,fp8,fp8,0,0.20036640167236328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,8,128,1,float16,fp8,0,0.20205121040344237
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,8,128,1,float16,float16,0,0.2389904022216797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,16,8,128,1,fp8,fp8,0,0.2009455919265747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,16,128,1,float16,float16,0,0.1510159969329834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,16,128,1,float16,fp8,0,0.12528159618377685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,16,128,1,fp8,fp8,0,0.1252112030982971
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,4,128,1,float16,fp8,0,0.3640944004058838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,1,128,1,float16,float16,0,0.1280608057975769
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,1,128,1,float16,fp8,0,0.11915680170059204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,16,8,128,1,fp8,fp8,0,0.362825608253479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,2,128,1,float16,fp8,0,0.1189120054244995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,2,128,1,fp8,fp8,0,0.11904000043869019
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,4,128,1,float16,float16,0,0.12938079833984376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,4,128,1,float16,fp8,0,0.11757440567016601
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,4,128,1,fp8,fp8,0,0.11896320581436157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,8,128,1,float16,float16,0,0.13379199504852296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,8,128,1,float16,fp8,0,0.11900479793548584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,8,128,1,fp8,fp8,0,0.11899039745330811
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,1,128,1,fp8,fp8,0,0.11730719804763794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,16,1,128,1,float16,float16,0,1.4025312423706056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,16,2,128,1,float16,float16,0,0.12998559474945068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,16,1,128,1,float16,fp8,0,1.2947168350219727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,16,1,128,1,fp8,fp8,0,1.4255215644836425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,16,2,128,1,float16,float16,0,1.4567008018493652
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,16,2,128,1,float16,fp8,0,1.2980287551879883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,16,2,128,1,fp8,fp8,0,1.2961376190185547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,16,4,128,1,float16,float16,0,1.4781439781188965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,16,4,128,1,float16,fp8,0,1.3146896362304688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,16,4,128,1,fp8,fp8,0,1.3013487815856934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,16,8,128,1,float16,float16,0,1.5674799919128417
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,16,8,128,1,float16,fp8,0,1.311569595336914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,16,8,128,1,fp8,fp8,0,1.3152527809143066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,16,128,1,float16,float16,0,0.9157919883728027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,16,128,1,float16,fp8,0,0.7520016193389892
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,16,128,1,fp8,fp8,0,0.7474864006042481
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,1,128,1,float16,float16,0,0.7383440017700196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,1,128,1,float16,fp8,0,0.6639823913574219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,1,128,1,fp8,fp8,0,0.6983935832977295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,2,128,1,float16,fp8,0,0.6751359939575196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,2,128,1,fp8,fp8,0,0.6646687984466553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,4,128,1,float16,float16,0,0.7476975917816162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,4,128,1,float16,fp8,0,0.6684591770172119
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,4,128,1,fp8,fp8,0,0.6734992027282715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,8,128,1,float16,fp8,0,0.6710656166076661
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,8,128,1,float16,float16,0,0.8057680130004883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,8,128,1,fp8,fp8,0,0.6715472221374512
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,16,128,1,float16,float16,0,0.47761921882629393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,16,128,1,fp8,fp8,0,0.3894128084182739
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,1,128,1,float16,float16,0,0.3830176115036011
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,1,128,1,float16,fp8,0,0.34673120975494387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,1,128,1,fp8,fp8,0,0.34842240810394287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,2,128,1,float16,float16,0,0.3803872108459473
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,2,128,1,float16,fp8,0,0.34861440658569337
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,16,2,128,1,float16,float16,0,0.7240928173065185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,2,128,1,fp8,fp8,0,0.3463871955871582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,4,128,1,float16,float16,0,0.4016895771026611
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,4,128,1,float16,fp8,0,0.34822878837585447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,4,128,1,fp8,fp8,0,0.34851999282836915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,8,128,1,float16,fp8,0,0.3507776021957397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,8,128,1,float16,float16,0,0.42560639381408694
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,8,128,1,fp8,fp8,0,0.35222721099853516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,16,128,1,float16,float16,0,0.25251359939575196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,16,128,1,float16,fp8,0,0.2124687910079956
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,16,128,1,fp8,fp8,0,0.2112272024154663
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,1,128,1,float16,float16,0,0.2048543930053711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,1,128,1,float16,fp8,0,0.18833919763565063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,1,128,1,fp8,fp8,0,0.18823039531707764
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,2,128,1,float16,float16,0,0.20409278869628905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,2,128,1,float16,fp8,0,0.1891600012779236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,2,128,1,fp8,fp8,0,0.18857920169830322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,16,16,128,1,float16,fp8,0,0.3923856019973755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,4,128,1,float16,float16,0,0.21222081184387206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,4,128,1,fp8,fp8,0,0.1890463948249817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,8,128,1,float16,float16,0,0.22567360401153563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,8,128,1,float16,fp8,0,0.19090240001678466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,8,128,1,fp8,fp8,0,0.19110239744186402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,16,128,1,float16,float16,0,0.14211519956588745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,16,128,1,fp8,fp8,0,0.12001919746398926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,1,128,1,float16,float16,0,0.11492160558700562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,1,128,1,float16,fp8,0,0.10879039764404297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,1,128,1,fp8,fp8,0,0.10878080129623413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,2,128,1,float16,float16,0,0.11627039909362794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,2,128,1,float16,fp8,0,0.10884480476379395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,2,128,1,fp8,fp8,0,0.10874719619750976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,4,128,1,float16,float16,0,0.11857600212097168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,4,128,1,float16,fp8,0,0.10875999927520752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,4,128,1,fp8,fp8,0,0.10884640216827393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,8,128,1,float16,float16,0,0.12654880285263062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,8,128,1,fp8,fp8,0,0.10886399745941162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,16,128,1,float16,float16,0,0.08222079873085023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,16,128,1,float16,fp8,0,0.07400320172309875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,16,128,1,fp8,fp8,0,0.07395359873771667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,1,128,1,float16,float16,0,0.07574080228805542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,1,128,1,float16,fp8,0,0.06983680129051209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,1,128,1,fp8,fp8,0,0.0697488009929657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,2,128,1,float16,float16,0,0.07523040175437927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,2,128,1,float16,fp8,0,0.06983839869499206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,2,128,1,fp8,fp8,0,0.06978880167007447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,4,128,1,float16,float16,0,0.07595679759979249
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,16,128,1,float16,fp8,0,0.12105920314788818
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,4,128,1,float16,fp8,0,0.06986879706382751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,4,128,1,fp8,fp8,0,0.06989920139312744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,8,128,1,float16,float16,0,0.07801120281219483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,8,128,1,float16,fp8,0,0.06994879841804505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,16,8,128,1,fp8,fp8,0,0.06984800100326538
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,16,8,128,1,float16,fp8,0,0.10887199640274048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,16,1,128,1,float16,float16,0,0.8484335899353027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,16,1,128,1,float16,fp8,0,0.8089232444763184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,16,1,128,1,fp8,fp8,0,0.8463024139404297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,16,4,128,1,float16,fp8,0,0.1902608036994934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,16,2,128,1,float16,float16,0,0.8542896270751953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,16,2,128,1,float16,fp8,0,0.8406720161437988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,16,2,128,1,fp8,fp8,0,0.8126864433288574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,16,4,128,1,float16,float16,0,0.8942192077636719
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,16,4,128,1,float16,fp8,0,0.8121135711669922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,16,4,128,1,fp8,fp8,0,0.8145024299621582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,16,8,128,1,float16,float16,0,0.9953344345092774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,16,8,128,1,float16,fp8,0,0.8183744430541993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,16,8,128,1,fp8,fp8,0,0.8209232330322266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,16,128,1,float16,float16,0,0.5848447799682617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,16,128,1,float16,fp8,0,0.47809758186340334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,16,128,1,fp8,fp8,0,0.47893438339233396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,1,128,1,float16,float16,0,0.45060482025146487
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,1,128,1,float16,fp8,0,0.42226080894470214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,1,128,1,fp8,fp8,0,0.4178944110870361
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,2,128,1,float16,float16,0,0.4465007781982422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,2,128,1,float16,fp8,0,0.4198575973510742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,2,128,1,fp8,fp8,0,0.4177855968475342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,4,128,1,float16,float16,0,0.4611824035644531
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,4,128,1,float16,fp8,0,0.41995677947998045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,4,128,1,fp8,fp8,0,0.421779203414917
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,8,128,1,float16,float16,0,0.49967198371887206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,8,128,1,float16,fp8,0,0.422822380065918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,16,128,1,float16,float16,0,0.304366397857666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,16,8,128,1,fp8,fp8,0,0.42314720153808594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,16,128,1,float16,fp8,0,0.2522320032119751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,16,128,1,fp8,fp8,0,0.252459192276001
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,1,128,1,float16,float16,0,0.23832640647888184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,1,128,1,float16,fp8,0,0.22201600074768066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,1,128,1,fp8,fp8,0,0.2213887929916382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,2,128,1,float16,float16,0,0.23383519649505616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,2,128,1,float16,fp8,0,0.22345919609069825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,2,128,1,fp8,fp8,0,0.2229167938232422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,4,128,1,float16,fp8,0,0.2222304105758667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,4,128,1,fp8,fp8,0,0.22353758811950683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,8,128,1,float16,fp8,0,0.2251120090484619
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,8,128,1,float16,float16,0,0.2665312051773071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,8,128,1,fp8,fp8,0,0.225816011428833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,16,128,1,float16,float16,0,0.16472480297088624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,16,128,1,float16,fp8,0,0.13969600200653076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,16,128,1,fp8,fp8,0,0.14061599969863892
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,1,128,1,float16,float16,0,0.12825759649276733
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,1,128,1,float16,fp8,0,0.1214095950126648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,1,128,1,fp8,fp8,0,0.12320319414138795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,2,128,1,float16,fp8,0,0.12219359874725341
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,2,128,1,fp8,fp8,0,0.12319200038909912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,4,128,1,float16,float16,0,0.13420480489730835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,4,128,1,float16,fp8,0,0.1230847954750061
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,4,128,1,fp8,fp8,0,0.12309119701385499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,8,128,1,float16,float16,0,0.1485743999481201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,8,128,1,float16,fp8,0,0.12519840002059937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,16,128,1,float16,float16,0,0.09468960165977477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,16,128,1,float16,fp8,0,0.08023040294647217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,16,128,1,fp8,fp8,0,0.0801807999610901
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,1,128,1,float16,float16,0,0.07617120146751404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,1,128,1,float16,fp8,0,0.07300959825515747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,1,128,1,fp8,fp8,0,0.07205600142478943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,2,128,1,float16,float16,0,0.0760479986667633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,16,4,128,1,float16,float16,0,0.2452847957611084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,2,128,1,float16,fp8,0,0.07349920272827148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,2,128,1,fp8,fp8,0,0.07377920150756836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,4,128,1,float16,fp8,0,0.073716801404953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,4,128,1,fp8,fp8,0,0.07294399738311767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,8,128,1,float16,float16,0,0.08202559947967529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,8,128,1,float16,fp8,0,0.07299519777297973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,8,128,1,fp8,fp8,0,0.07202720046043395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,16,128,1,float16,float16,0,0.05523359775543213
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,16,128,1,float16,fp8,0,0.051534402370452884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,16,128,1,fp8,fp8,0,0.05140320062637329
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,1,128,1,float16,float16,0,0.05139039754867554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,1,128,1,float16,fp8,0,0.04780319929122925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,8,128,1,fp8,fp8,0,0.1252063989639282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,1,128,1,fp8,fp8,0,0.048695999383926394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,2,128,1,float16,float16,0,0.05151039958000183
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,2,128,1,float16,fp8,0,0.04928480088710785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,2,128,1,fp8,fp8,0,0.04917120039463043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,4,128,1,float16,float16,0,0.052718400955200195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,4,128,1,float16,fp8,0,0.048977598547935486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,4,128,1,fp8,fp8,0,0.04772000014781952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,8,128,1,float16,float16,0,0.053801602125167845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,8,128,1,float16,fp8,0,0.04831840097904205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,16,8,128,1,fp8,fp8,0,0.049296000599861146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,16,1,128,1,float16,float16,0,0.844324779510498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,16,1,128,1,float16,fp8,0,0.8460800170898437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,16,4,128,1,float16,float16,0,0.07825760245323181
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,16,1,128,1,fp8,fp8,0,0.8440303802490234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,16,2,128,1,float16,float16,0,0.1295696020126343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,16,2,128,1,float16,float16,0,0.875119972229004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,16,2,128,1,float16,fp8,0,0.844816017150879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,16,2,128,1,fp8,fp8,0,0.8447711944580079
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,16,4,128,1,float16,float16,0,0.908574390411377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,16,4,128,1,float16,fp8,0,0.8505104064941407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,16,4,128,1,fp8,fp8,0,0.8474335670471191
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,16,8,128,1,float16,float16,0,1.0042223930358887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,16,8,128,1,float16,fp8,0,0.8582719802856446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,16,8,128,1,fp8,fp8,0,0.8561327934265137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,16,128,1,float16,fp8,0,0.5123968124389648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,16,128,1,fp8,fp8,0,0.5120528221130372
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,1,128,1,float16,float16,0,0.44298081398010253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,1,128,1,float16,fp8,0,0.43249120712280276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,1,128,1,fp8,fp8,0,0.4331200122833252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,2,128,1,float16,float16,0,0.442033576965332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,2,128,1,float16,fp8,0,0.4348559856414795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,2,128,1,fp8,fp8,0,0.4326335906982422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,4,128,1,float16,float16,0,0.47386560440063474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,4,128,1,float16,fp8,0,0.4344672203063965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,4,128,1,fp8,fp8,0,0.4359295845031738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,8,128,1,float16,float16,0,0.5168352127075195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,8,128,1,float16,fp8,0,0.4401519775390625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,16,128,1,float16,float16,0,0.6004032135009766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,16,8,128,1,fp8,fp8,0,0.4383391857147217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,16,128,1,float16,float16,0,0.3148367881774902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,16,128,1,fp8,fp8,0,0.2667855978012085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,1,128,1,float16,float16,0,0.2326672077178955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,1,128,1,float16,fp8,0,0.22652480602264405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,1,128,1,fp8,fp8,0,0.22745919227600098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,2,128,1,float16,float16,0,0.23154881000518798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,2,128,1,float16,fp8,0,0.22752959728240968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,4,128,1,float16,float16,0,0.24523038864135743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,2,128,1,fp8,fp8,0,0.22688159942626954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,4,128,1,float16,fp8,0,0.2280128002166748
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,4,128,1,fp8,fp8,0,0.22756800651550294
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,8,128,1,float16,float16,0,0.26843039989471434
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,8,128,1,float16,fp8,0,0.23131198883056642
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,16,128,1,float16,float16,0,0.1670591950416565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,16,128,1,float16,fp8,0,0.14459680318832396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,16,128,1,fp8,fp8,0,0.14409600496292113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,1,128,1,float16,float16,0,0.1268720030784607
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,1,128,1,float16,fp8,0,0.12318400144577027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,1,128,1,fp8,fp8,0,0.12308319807052612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,2,128,1,float16,float16,0,0.12504639625549316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,2,128,1,float16,fp8,0,0.12325439453125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,2,128,1,fp8,fp8,0,0.12312639951705932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,4,128,1,float16,float16,0,0.13477920293807982
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,4,128,1,float16,fp8,0,0.12334239482879639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,4,128,1,fp8,fp8,0,0.12391040325164795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,8,128,1,float16,float16,0,0.1466223955154419
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,8,128,1,fp8,fp8,0,0.12519840002059937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,16,128,1,float16,fp8,0,0.2666208028793335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,16,128,1,float16,float16,0,0.09493119716644287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,16,128,1,float16,fp8,0,0.0821183979511261
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,16,128,1,fp8,fp8,0,0.08216320276260376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,1,128,1,float16,float16,0,0.07200319766998291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,1,128,1,float16,fp8,0,0.07013760209083557
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,1,128,1,fp8,fp8,0,0.06990560293197631
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,2,128,1,float16,float16,0,0.07189440131187438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,2,128,1,float16,fp8,0,0.07016639709472657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,2,128,1,fp8,fp8,0,0.07016800045967102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,4,128,1,float16,float16,0,0.07445120215415954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,4,128,1,float16,fp8,0,0.06993759870529175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,4,128,1,fp8,fp8,0,0.07044000029563904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,8,128,1,float16,float16,0,0.08246240019798279
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,8,128,1,fp8,fp8,0,0.07141280174255371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,16,8,128,1,fp8,fp8,0,0.2297215938568115
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,16,128,1,float16,fp8,0,0.04959680140018463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,16,128,1,fp8,fp8,0,0.04968000054359436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,1,128,1,float16,float16,0,0.04725759923458099
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,1,128,1,float16,fp8,0,0.04533439874649048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,1,128,1,fp8,fp8,0,0.04524640142917633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,2,128,1,float16,float16,0,0.04728319942951202
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,2,128,1,float16,fp8,0,0.04533439874649048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,2,128,1,fp8,fp8,0,0.04546720087528229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,4,128,1,float16,float16,0,0.04800159931182861
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,16,8,128,1,float16,fp8,0,0.12549599409103393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,4,128,1,float16,fp8,0,0.04531359970569611
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,4,128,1,fp8,fp8,0,0.045902401208877563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,8,128,1,float16,float16,0,0.04945760071277618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,8,128,1,float16,fp8,0,0.045256000757217404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,8,128,1,fp8,fp8,0,0.045332801342010495
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,16,128,1,float16,float16,0,0.033025598526000975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,16,128,1,float16,fp8,0,0.030990400910377504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,16,128,1,fp8,fp8,0,0.03238880038261414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,1,128,1,float16,float16,0,0.03091840147972107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,1,128,1,float16,fp8,0,0.02988480031490326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,1,128,1,fp8,fp8,0,0.028892800211906433
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,2,128,1,float16,float16,0,0.03102239966392517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,2,128,1,float16,fp8,0,0.02889760136604309
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,2,128,1,fp8,fp8,0,0.030723199248313904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,4,128,1,float16,float16,0,0.030904000997543334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,4,128,1,float16,fp8,0,0.029652801156044007
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,4,128,1,fp8,fp8,0,0.028934401273727418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,8,128,1,float16,float16,0,0.03284479975700379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,8,128,1,float16,fp8,0,0.02890079915523529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,16,8,128,1,fp8,fp8,0,0.029529601335525513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,16,8,128,1,float16,fp8,0,0.07015680074691773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,16,16,128,1,float16,float16,0,0.0547760009765625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,16,1,128,1,float16,float16,0,0.5401120185852051
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,16,1,128,1,float16,fp8,0,0.5527344226837159
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,16,1,128,1,fp8,fp8,0,0.5515439987182618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,16,2,128,1,float16,float16,0,0.5433328151702881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,16,2,128,1,float16,fp8,0,0.553545618057251
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,16,2,128,1,fp8,fp8,0,0.5548799991607666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,16,4,128,1,float16,float16,0,0.5795551776885987
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,16,4,128,1,float16,fp8,0,0.5546832084655762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,16,4,128,1,fp8,fp8,0,0.5562143802642823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,16,8,128,1,float16,float16,0,0.6489568233489991
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,16,8,128,1,float16,fp8,0,0.5604464054107666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,16,8,128,1,fp8,fp8,0,0.562388801574707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,16,128,1,float16,float16,0,0.39700319766998293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,16,128,1,float16,fp8,0,0.34363839626312254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,16,128,1,fp8,fp8,0,0.344484806060791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,1,128,1,float16,float16,0,0.2771008014678955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,1,128,1,float16,fp8,0,0.2849440097808838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,1,128,1,fp8,fp8,0,0.2851727962493896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,2,128,1,float16,float16,0,0.28114559650421145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,2,128,1,float16,fp8,0,0.2852479934692383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,2,128,1,fp8,fp8,0,0.2862864017486572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,4,128,1,float16,float16,0,0.30058879852294923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,4,128,1,float16,fp8,0,0.2870192050933838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,4,128,1,fp8,fp8,0,0.2871999979019165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,8,128,1,float16,float16,0,0.3354464054107666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,8,128,1,float16,fp8,0,0.28932480812072753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,16,8,128,1,fp8,fp8,0,0.2904848098754883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,16,128,1,float16,float16,0,0.2090127944946289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,16,128,1,float16,fp8,0,0.18081120252609253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,1,128,1,float16,fp8,0,0.15054559707641602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,1,128,1,fp8,fp8,0,0.15092320442199708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,2,128,1,float16,float16,0,0.14955519437789916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,2,128,1,float16,fp8,0,0.15173280239105225
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,2,128,1,fp8,fp8,0,0.15173920392990112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,4,128,1,float16,float16,0,0.15990079641342164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,4,128,1,float16,fp8,0,0.15178560018539428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,4,128,1,fp8,fp8,0,0.1521648049354553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,8,128,1,float16,float16,0,0.17742400169372557
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,8,128,1,fp8,fp8,0,0.1541599988937378
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,8,128,1,float16,fp8,0,0.15414559841156006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,16,128,1,float16,float16,0,0.1130784034729004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,16,128,1,float16,fp8,0,0.10003520250320434
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,16,128,1,fp8,fp8,0,0.10007359981536865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,1,128,1,float16,float16,0,0.08294240236282349
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,1,128,1,float16,fp8,0,0.08254879713058472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,1,128,1,fp8,fp8,0,0.08282560110092163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,16,128,1,fp8,fp8,0,0.18250240087509156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,2,128,1,float16,float16,0,0.08416960239410401
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,2,128,1,float16,fp8,0,0.0835536003112793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,2,128,1,fp8,fp8,0,0.0840287983417511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,4,128,1,float16,fp8,0,0.08392639756202698
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,4,128,1,fp8,fp8,0,0.08407359719276428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,8,128,1,float16,float16,0,0.0990880012512207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,8,128,1,float16,fp8,0,0.08563680052757264
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,8,128,1,fp8,fp8,0,0.08441920280456543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,16,128,1,float16,float16,0,0.06679999828338623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,16,128,1,float16,fp8,0,0.0574720025062561
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,16,128,1,fp8,fp8,0,0.05749760270118713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,1,128,1,float16,float16,0,0.0502623975276947
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,1,128,1,float16,fp8,0,0.05132799744606018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,1,128,1,fp8,fp8,0,0.04934560060501099
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,2,128,1,float16,float16,0,0.051395201683044435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,2,128,1,float16,fp8,0,0.04951040148735046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,4,128,1,float16,float16,0,0.051523202657699586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,4,128,1,float16,fp8,0,0.049721598625183105
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,4,128,1,fp8,fp8,0,0.05015680193901062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,8,128,1,float16,float16,0,0.05733280181884766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,8,128,1,float16,fp8,0,0.0499424010515213
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,8,128,1,fp8,fp8,0,0.049449598789215087
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,16,128,1,float16,float16,0,0.03909280002117157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,16,128,1,float16,fp8,0,0.03695360124111176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,16,128,1,fp8,fp8,0,0.03701440095901489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,1,128,1,float16,float16,0,0.03513599932193756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,1,128,1,float16,fp8,0,0.03302879929542542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,1,128,1,fp8,fp8,0,0.03303520083427429
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,16,1,128,1,float16,float16,0,0.14768160581588746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,2,128,1,float16,float16,0,0.034999999403953555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,2,128,1,float16,fp8,0,0.03309600055217743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,16,4,128,1,float16,float16,0,0.08843680024147034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,2,128,1,fp8,fp8,0,0.03314239978790283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,4,128,1,float16,float16,0,0.03500959873199463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,4,128,1,float16,fp8,0,0.03323040008544922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,4,128,1,fp8,fp8,0,0.03297599852085113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,8,128,1,float16,fp8,0,0.03314560055732727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,8,128,1,fp8,fp8,0,0.03315199911594391
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,16,128,1,float16,float16,0,0.02685439884662628
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,16,128,1,float16,fp8,0,0.026740801334381104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,16,128,1,fp8,fp8,0,0.026771199703216553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,1,128,1,float16,float16,0,0.02475679963827133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,1,128,1,float16,fp8,0,0.024740800261497498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,1,128,1,fp8,fp8,0,0.02470560073852539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,2,128,1,float16,float16,0,0.02487040013074875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,2,128,1,float16,fp8,0,0.024710400402545928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,2,128,1,fp8,fp8,0,0.024718399345874786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,4,128,1,float16,float16,0,0.024766400456428528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,4,128,1,float16,fp8,0,0.024851199984550477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,16,2,128,1,fp8,fp8,0,0.05092960000038147
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,4,128,1,fp8,fp8,0,0.024724799394607543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,8,128,1,float16,float16,0,0.026732799410820008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,8,128,1,float16,fp8,0,0.024774399399757386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,16,8,128,1,fp8,fp8,0,0.024700799584388734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,16,1,128,1,float16,float16,0,0.571943998336792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,16,1,128,1,float16,fp8,0,0.6162655830383301
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,16,1,128,1,fp8,fp8,0,0.6147744178771972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,16,2,128,1,float16,float16,0,0.582476806640625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,16,2,128,1,float16,fp8,0,0.6176735877990722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,16,2,128,1,fp8,fp8,0,0.6159264087677002
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,16,4,128,1,float16,float16,0,0.6354047775268554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,16,4,128,1,float16,fp8,0,0.6185215950012207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,16,8,128,1,float16,float16,0,0.03707360029220581
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,16,4,128,1,fp8,fp8,0,0.6195007801055908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,16,8,128,1,float16,fp8,0,0.6246335983276368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,16,128,1,float16,float16,0,0.44685921669006345
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,16,8,128,1,fp8,fp8,0,0.626361608505249
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,16,128,1,float16,fp8,0,0.3897279977798462
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,16,128,1,fp8,fp8,0,0.39134399890899657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,1,128,1,float16,float16,0,0.29471840858459475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,1,128,1,float16,fp8,0,0.31562879085540774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,1,128,1,fp8,fp8,0,0.31502718925476075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,2,128,1,float16,float16,0,0.3031984090805054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,2,128,1,float16,fp8,0,0.3158128023147583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,2,128,1,fp8,fp8,0,0.3166512012481689
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,4,128,1,float16,float16,0,0.32284319400787354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,4,128,1,float16,fp8,0,0.317739200592041
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,4,128,1,fp8,fp8,0,0.3170639991760254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,8,128,1,float16,float16,0,0.3688159942626953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,16,8,128,1,float16,float16,0,0.7175871849060058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,8,128,1,float16,fp8,0,0.3199759960174561
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,16,8,128,1,fp8,fp8,0,0.32125279903411863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,16,128,1,float16,float16,0,0.2330672025680542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,16,128,1,float16,fp8,0,0.20294880867004395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,1,128,1,float16,float16,0,0.15678720474243163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,16,128,1,fp8,fp8,0,0.20327680110931395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,1,128,1,float16,fp8,0,0.1646000027656555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,1,128,1,fp8,fp8,0,0.16468479633331298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,2,128,1,float16,fp8,0,0.1653264045715332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,2,128,1,fp8,fp8,0,0.16513919830322266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,4,128,1,float16,float16,0,0.17012799978256227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,4,128,1,float16,fp8,0,0.16612000465393068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,4,128,1,fp8,fp8,0,0.16611520051956177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,8,128,1,float16,float16,0,0.19091039896011353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,8,128,1,float16,fp8,0,0.16812000274658204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,8,128,1,fp8,fp8,0,0.1682160019874573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,16,128,1,float16,float16,0,0.12403520345687866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,16,128,1,fp8,fp8,0,0.10970720052719116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,1,128,1,float16,fp8,0,0.08834400177001953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,1,128,1,fp8,fp8,0,0.08869439959526063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,2,128,1,float16,float16,0,0.08691359758377075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,2,128,1,float16,fp8,0,0.09028480052947999
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,2,128,1,fp8,fp8,0,0.08989440202713013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,4,128,1,float16,float16,0,0.09401599764823913
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,4,128,1,float16,fp8,0,0.09030399918556213
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,4,128,1,fp8,fp8,0,0.08998079895973206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,8,128,1,float16,float16,0,0.10441440343856812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,8,128,1,float16,fp8,0,0.09108480215072631
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,8,128,1,fp8,fp8,0,0.09115039706230163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,16,2,128,1,float16,float16,0,0.15969280004501343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,16,128,1,float16,float16,0,0.06976159811019897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,16,128,1,float16,fp8,0,0.06197440028190613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,16,128,1,fp8,fp8,0,0.06161440014839172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,1,128,1,float16,fp8,0,0.051318401098251344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,1,128,1,fp8,fp8,0,0.05138880014419556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,2,128,1,float16,float16,0,0.05014240145683289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,16,128,1,float16,fp8,0,0.10966399908065796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,2,128,1,float16,fp8,0,0.05140799880027771
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,2,128,1,fp8,fp8,0,0.05135679841041565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,4,128,1,float16,float16,0,0.0534496009349823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,4,128,1,float16,fp8,0,0.051291197538375854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,4,128,1,fp8,fp8,0,0.051420801877975465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,8,128,1,float16,float16,0,0.05958719849586487
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,8,128,1,float16,fp8,0,0.05145279765129089
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,8,128,1,fp8,fp8,0,0.0514352023601532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,16,128,1,float16,float16,0,0.03937279880046844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,16,128,1,float16,fp8,0,0.03702239990234375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,16,128,1,fp8,fp8,0,0.03711999952793121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,1,128,1,float16,fp8,0,0.03293919861316681
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,1,128,1,fp8,fp8,0,0.03303520083427429
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,2,128,1,float16,float16,0,0.03296000063419342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,2,128,1,float16,fp8,0,0.03296160101890564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,2,128,1,fp8,fp8,0,0.0329584002494812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,4,128,1,float16,float16,0,0.03414719998836517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,4,128,1,float16,fp8,0,0.03294720053672791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,4,128,1,fp8,fp8,0,0.03293760120868683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,8,128,1,float16,float16,0,0.036371201276779175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,8,128,1,float16,fp8,0,0.03303839862346649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,8,128,1,fp8,fp8,0,0.03307519853115082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,16,128,1,float16,float16,0,0.023915199935436247
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,16,1,128,1,float16,float16,0,0.05006399750709534
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,16,128,1,float16,fp8,0,0.02271360009908676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,16,128,1,fp8,fp8,0,0.02473759949207306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,1,128,1,float16,float16,0,0.020735999941825865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,1,128,1,float16,fp8,0,0.022150400280952453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,1,128,1,fp8,fp8,0,0.020691199600696562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,2,128,1,float16,float16,0,0.021436800062656403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,2,128,1,float16,fp8,0,0.020710399746894835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,2,128,1,fp8,fp8,0,0.02083680033683777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,4,128,1,float16,float16,0,0.02210720032453537
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,16,1,128,1,float16,float16,0,0.08619199991226197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,4,128,1,fp8,fp8,0,0.020734399557113647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,8,128,1,float16,float16,0,0.022819200158119203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,8,128,1,float16,fp8,0,0.020681600272655486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,8,128,1,fp8,fp8,0,0.022137600183486938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,16,128,1,float16,float16,0,0.020729599893093108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,16,128,1,float16,fp8,0,0.020828799903392793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,16,128,1,fp8,fp8,0,0.020652799308300017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,1,128,1,float16,float16,0,0.020552000403404234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,1,128,1,float16,fp8,0,0.020532800257205962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,16,1,128,1,float16,float16,0,0.03300639986991882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,1,128,1,fp8,fp8,0,0.02070239931344986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,2,128,1,float16,float16,0,0.020601600408554077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,2,128,1,float16,fp8,0,0.02062239944934845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,2,128,1,fp8,fp8,0,0.019489599764347075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,4,128,1,float16,float16,0,0.020718400180339814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,4,128,1,float16,fp8,0,0.020392000675201416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,4,128,1,fp8,fp8,0,0.020529599487781526
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,8,128,1,float16,fp8,0,0.019926400482654573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,8,128,1,fp8,fp8,0,0.019729599356651306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,16,1,128,1,float16,float16,0,0.4431424140930176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,16,1,128,1,float16,fp8,0,0.49724478721618653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,16,4,128,1,float16,fp8,0,0.02086080014705658
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,16,1,128,1,fp8,fp8,0,0.4974368095397949
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,16,2,128,1,float16,float16,0,0.4510000228881836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,16,2,128,1,float16,fp8,0,0.4977839946746826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,16,2,128,1,fp8,fp8,0,0.49866561889648436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,16,4,128,1,float16,float16,0,0.4898367881774902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,16,8,128,1,float16,float16,0,0.02066880017518997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,16,4,128,1,float16,fp8,0,0.5000192165374756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,16,4,128,1,fp8,fp8,0,0.4985360145568848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,16,8,128,1,float16,float16,0,0.5692848205566406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,16,8,128,1,float16,fp8,0,0.502235221862793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,16,128,1,float16,float16,0,0.3711679935455322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,16,8,128,1,fp8,fp8,0,0.500486421585083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,1,128,1,float16,float16,0,0.22962720394134523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,16,128,1,fp8,fp8,0,0.32544960975646975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,1,128,1,float16,fp8,0,0.2545759916305542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,1,128,1,fp8,fp8,0,0.25482079982757566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,2,128,1,float16,float16,0,0.2320080041885376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,2,128,1,float16,fp8,0,0.2548432111740112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,2,128,1,fp8,fp8,0,0.2556303977966309
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,4,128,1,float16,float16,0,0.25288000106811526
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,4,128,1,fp8,fp8,0,0.2561392068862915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,8,128,1,float16,float16,0,0.2923935890197754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,8,128,1,float16,fp8,0,0.25724000930786134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,8,128,1,fp8,fp8,0,0.2573632001876831
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,16,128,1,float16,float16,0,0.192467200756073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,16,128,1,float16,fp8,0,0.16956640481948854
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,16,128,1,fp8,fp8,0,0.17008320093154908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,1,128,1,float16,float16,0,0.12264319658279418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,1,128,1,float16,fp8,0,0.13328640460968016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,2,128,1,float16,float16,0,0.12323999404907227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,1,128,1,fp8,fp8,0,0.13443200588226317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,2,128,1,float16,fp8,0,0.13337440490722657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,2,128,1,fp8,fp8,0,0.13345119953155518
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,4,128,1,float16,float16,0,0.132804799079895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,4,128,1,float16,fp8,0,0.13344639539718628
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,4,128,1,fp8,fp8,0,0.13337279558181764
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,16,128,1,float16,fp8,0,0.3263423919677734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,8,128,1,float16,fp8,0,0.13498879671096803
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,8,128,1,fp8,fp8,0,0.13542879819869996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,16,128,1,float16,float16,0,0.10296159982681274
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,16,128,1,float16,fp8,0,0.09243999719619751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,16,128,1,fp8,fp8,0,0.0922864019870758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,1,128,1,float16,float16,0,0.06888160109519958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,1,128,1,float16,fp8,0,0.07200160026550292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,1,128,1,fp8,fp8,0,0.07200000286102295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,2,128,1,float16,float16,0,0.06875200271606445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,2,128,1,float16,fp8,0,0.07375839948654175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,2,128,1,fp8,fp8,0,0.07192320227622986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,16,4,128,1,float16,fp8,0,0.25553441047668457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,4,128,1,float16,float16,0,0.0742896020412445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,4,128,1,float16,fp8,0,0.07394400238990784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,4,128,1,fp8,fp8,0,0.07371360063552856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,8,128,1,float16,float16,0,0.08443520069122315
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,8,128,1,float16,fp8,0,0.07391200065612794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,16,8,128,1,fp8,fp8,0,0.07396479845046997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,16,128,1,float16,float16,0,0.05816159844398498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,16,128,1,fp8,fp8,0,0.05146880149841308
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,1,128,1,float16,float16,0,0.039323198795318606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,1,128,1,float16,fp8,0,0.041300800442695615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,1,128,1,fp8,fp8,0,0.0412447988986969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,2,128,1,float16,float16,0,0.03927200138568878
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,2,128,1,float16,fp8,0,0.04123679995536804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,2,128,1,fp8,fp8,0,0.041222399473190306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,4,128,1,float16,float16,0,0.04316479861736298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,4,128,1,float16,fp8,0,0.04127199947834015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,4,128,1,fp8,fp8,0,0.04118239879608154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,8,128,1,float16,float16,0,0.048614400625228885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,8,128,1,fp8,fp8,0,0.04211679995059967
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,16,128,1,float16,float16,0,0.032209599018096925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,16,128,1,float16,fp8,0,0.030849599838256837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,16,128,1,fp8,fp8,0,0.03094559907913208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,1,128,1,float16,float16,0,0.024940800666809083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,1,128,1,float16,fp8,0,0.026774400472640993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,1,128,1,fp8,fp8,0,0.026767998933792114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,2,128,1,float16,float16,0,0.02499520033597946
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,2,128,1,float16,fp8,0,0.026756799221038817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,2,128,1,fp8,fp8,0,0.0269679993391037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,4,128,1,float16,float16,0,0.026800000667572023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,4,128,1,float16,fp8,0,0.026756799221038817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,4,128,1,fp8,fp8,0,0.026683199405670165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,8,128,1,float16,float16,0,0.028918400406837463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,8,128,1,float16,fp8,0,0.026819199323654175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,16,8,128,1,fp8,fp8,0,0.02686080038547516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,16,128,1,float16,fp8,0,0.05139679908752441
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,16,128,1,float16,fp8,0,0.018636800348758698
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,16,8,128,1,float16,fp8,0,0.04219360053539276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,16,128,1,fp8,fp8,0,0.018985599279403687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,1,128,1,float16,float16,0,0.016748799383640288
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,1,128,1,float16,fp8,0,0.018644799292087556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,1,128,1,fp8,fp8,0,0.018510399758815764
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,2,128,1,float16,float16,0,0.01780479997396469
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,2,128,1,float16,fp8,0,0.018508799374103546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,16,8,128,1,float16,float16,0,0.15241600275039674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,2,128,1,fp8,fp8,0,0.017454400658607483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,4,128,1,float16,float16,0,0.016814400255680085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,4,128,1,float16,fp8,0,0.018172800540924072
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,4,128,1,fp8,fp8,0,0.016551999747753142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,8,128,1,float16,fp8,0,0.01839679926633835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,8,128,1,fp8,fp8,0,0.017636799812316896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,16,128,1,float16,float16,0,0.016568000614643096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,16,128,1,float16,fp8,0,0.016599999368190767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,16,128,1,fp8,fp8,0,0.016551999747753142
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,1,128,1,float16,float16,0,0.015033599734306336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,1,128,1,float16,fp8,0,0.01637600064277649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,1,128,1,fp8,fp8,0,0.016129599511623384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,16,128,1,float16,float16,0,0.01871519982814789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,2,128,1,float16,fp8,0,0.015056000649929046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,2,128,1,fp8,fp8,0,0.015412800014019012
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,4,128,1,float16,float16,0,0.01653279960155487
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,4,128,1,float16,fp8,0,0.016564799845218657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,4,128,1,fp8,fp8,0,0.016527999937534333
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,8,128,1,float16,float16,0,0.016646400094032288
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,8,128,1,float16,fp8,0,0.016548800468444824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,8,128,1,fp8,fp8,0,0.016471999883651733
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,16,128,1,float16,float16,0,0.016553600132465363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,16,128,1,float16,fp8,0,0.016505600512027742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,16,128,1,fp8,fp8,0,0.014659200608730317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,1,128,1,float16,float16,0,0.014644800126552582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,1,128,1,float16,fp8,0,0.01459999978542328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,1,128,1,fp8,fp8,0,0.014571200311183929
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,16,8,128,1,float16,float16,0,0.01867839992046356
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,2,128,1,float16,float16,0,0.014731200039386749
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,2,128,1,float16,fp8,0,0.014606399834156037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,2,128,1,fp8,fp8,0,0.01462559998035431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,4,128,1,float16,float16,0,0.016489599645137788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,4,128,1,float16,fp8,0,0.014628799259662628
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,4,128,1,fp8,fp8,0,0.014655999839305878
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,8,128,1,float16,float16,0,0.015783999860286713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,8,128,1,float16,fp8,0,0.014577600359916686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,16,8,128,1,fp8,fp8,0,0.014627200365066529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,16,1,128,1,float16,float16,0,0.1896191954612732
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,16,1,128,1,float16,fp8,0,0.21838080883026123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,16,1,128,1,fp8,fp8,0,0.21802880764007568
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,16,2,128,1,float16,float16,0,0.1908768057823181
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,16,2,128,1,float16,float16,0,0.014596800506114959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,16,2,128,1,float16,fp8,0,0.21875040531158446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,16,2,128,1,fp8,fp8,0,0.2183824062347412
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,16,4,128,1,float16,fp8,0,0.21767680644989013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,16,4,128,1,fp8,fp8,0,0.21871678829193114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,16,8,128,1,float16,float16,0,0.2485136032104492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,16,8,128,1,float16,fp8,0,0.21819839477539063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,16,128,1,float16,float16,0,0.1732800006866455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,16,128,1,float16,fp8,0,0.14916800260543822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,16,128,1,fp8,fp8,0,0.14967360496520996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,1,128,1,float16,float16,0,0.10250719785690307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,1,128,1,fp8,fp8,0,0.11505279541015626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,2,128,1,float16,float16,0,0.10283039808273316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,2,128,1,float16,fp8,0,0.11516319513320923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,2,128,1,fp8,fp8,0,0.11518559455871583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,4,128,1,float16,float16,0,0.11258879899978638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,4,128,1,float16,fp8,0,0.11556960344314575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,4,128,1,fp8,fp8,0,0.115174400806427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,8,128,1,float16,float16,0,0.131387197971344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,8,128,1,float16,fp8,0,0.1168768048286438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,8,128,1,fp8,fp8,0,0.11620320081710815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,16,4,128,1,float16,float16,0,0.21047840118408204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,16,128,1,float16,float16,0,0.09214720129966736
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,16,128,1,float16,fp8,0,0.08002079725265503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,16,128,1,fp8,fp8,0,0.08001919984817504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,1,128,1,float16,float16,0,0.05676800012588501
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,1,128,1,float16,fp8,0,0.06159359812736511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,16,8,128,1,fp8,fp8,0,0.21876800060272217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,2,128,1,float16,float16,0,0.05752800107002258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,2,128,1,float16,fp8,0,0.06206240057945252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,2,128,1,fp8,fp8,0,0.06175360083580017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,4,128,1,float16,float16,0,0.06161919832229614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,4,128,1,float16,fp8,0,0.0625760018825531
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,4,128,1,fp8,fp8,0,0.06190400123596192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,8,128,1,float16,float16,0,0.07201120257377625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,8,128,1,float16,fp8,0,0.0634992003440857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,8,128,1,fp8,fp8,0,0.06363040208816528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,16,128,1,float16,float16,0,0.05247039794921875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,16,128,1,float16,fp8,0,0.04694559872150421
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,16,128,1,fp8,fp8,0,0.04722239971160889
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,1,128,1,float16,float16,0,0.03297599852085113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,1,128,1,float16,fp8,0,0.03699359893798828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,1,128,1,fp8,fp8,0,0.03697920143604279
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,2,128,1,float16,float16,0,0.03324800133705139
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,2,128,1,float16,fp8,0,0.03686400055885315
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,4,128,1,float16,float16,0,0.03707520067691803
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,4,128,1,float16,fp8,0,0.0369488000869751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,4,128,1,fp8,fp8,0,0.03703519999980927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,8,128,1,float16,float16,0,0.041526401042938234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,8,128,1,float16,fp8,0,0.03698720037937164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,8,128,1,fp8,fp8,0,0.03703039884567261
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,16,1,128,1,fp8,fp8,0,0.06160159707069397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,16,128,1,float16,float16,0,0.02691200077533722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,16,128,1,float16,fp8,0,0.026790401339530943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,16,128,1,fp8,fp8,0,0.026774400472640993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,1,128,1,float16,float16,0,0.020713600516319274
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,16,1,128,1,float16,fp8,0,0.11623200178146362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,1,128,1,float16,fp8,0,0.02269279956817627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,1,128,1,fp8,fp8,0,0.022700800001621245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,2,128,1,float16,float16,0,0.0208639994263649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,2,128,1,float16,fp8,0,0.022745600342750548
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,4,128,1,float16,float16,0,0.021531200408935545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,4,128,1,float16,fp8,0,0.02271520048379898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,16,2,128,1,fp8,fp8,0,0.03700799942016601
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,4,128,1,fp8,fp8,0,0.022729599475860597
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,8,128,1,float16,float16,0,0.02476319968700409
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,8,128,1,float16,fp8,0,0.022703999280929567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,8,128,1,fp8,fp8,0,0.022819200158119203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,16,128,1,float16,float16,0,0.01720000058412552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,16,128,1,float16,fp8,0,0.016686399281024934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,16,128,1,fp8,fp8,0,0.016590400040149687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,1,128,1,float16,float16,0,0.014552000164985656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,1,128,1,fp8,fp8,0,0.014660799503326416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,2,128,1,float16,float16,0,0.014529600739479065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,2,128,1,float16,fp8,0,0.014608000218868256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,2,128,1,fp8,fp8,0,0.0146479994058609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,4,128,1,float16,float16,0,0.014665600657463074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,4,128,1,float16,fp8,0,0.014575999975204468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,4,128,1,fp8,fp8,0,0.01645279973745346
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,8,128,1,float16,float16,0,0.016572800278663636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,8,128,1,float16,fp8,0,0.0165120005607605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,8,128,1,fp8,fp8,0,0.016531200706958772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,16,128,1,float16,float16,0,0.014627200365066529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,16,128,1,float16,fp8,0,0.014567999541759491
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,16,128,1,fp8,fp8,0,0.014696000516414643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,1,128,1,float16,float16,0,0.014460800588130951
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,1,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,1,128,1,fp8,fp8,0,0.014507199823856353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,2,128,1,float16,float16,0,0.01279519945383072
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,2,128,1,float16,fp8,0,0.014420799911022186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,2,128,1,fp8,fp8,0,0.014535999298095703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,4,128,1,float16,float16,0,0.014548799395561219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,4,128,1,float16,fp8,0,0.014446400105953217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,4,128,1,fp8,fp8,0,0.014425599575042724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,8,128,1,float16,float16,0,0.014521600306034088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,8,128,1,fp8,fp8,0,0.01459999978542328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,16,1,128,1,float16,fp8,0,0.014720000326633453
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,16,128,1,float16,float16,0,0.014584000408649444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,16,128,1,float16,fp8,0,0.01255200058221817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,1,128,1,float16,float16,0,0.012558400630950928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,16,128,1,fp8,fp8,0,0.012681600451469422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,1,128,1,float16,fp8,0,0.012462399899959564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,1,128,1,fp8,fp8,0,0.012572799623012543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,2,128,1,float16,float16,0,0.012547199428081513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,2,128,1,float16,fp8,0,0.012651200592517852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,2,128,1,fp8,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,4,128,1,float16,fp8,0,0.012491200119256973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,4,128,1,fp8,fp8,0,0.012476799637079239
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,8,128,1,float16,float16,0,0.012529599666595458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,8,128,1,float16,fp8,0,0.01244800016283989
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,8,128,1,fp8,fp8,0,0.01244800016283989
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,16,128,1,float16,float16,0,0.012518399953842163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,16,128,1,float16,fp8,0,0.01247519999742508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,16,128,1,fp8,fp8,0,0.012478400021791458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,1,128,1,float16,float16,0,0.012495999783277511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,1,128,1,float16,fp8,0,0.012481600046157837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,1,128,1,fp8,fp8,0,0.01247360035777092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,2,128,1,float16,float16,0,0.01247519999742508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,2,128,1,float16,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,2,128,1,fp8,fp8,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,4,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,4,128,1,float16,fp8,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,4,128,1,fp8,fp8,0,0.012507200241088867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,16,8,128,1,float16,fp8,0,0.014472000300884247
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,8,128,1,float16,fp8,0,0.01257600039243698
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,8,128,1,fp8,fp8,0,0.012580800056457519
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,16,1,128,1,float16,float16,0,0.11911360025405884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,16,1,128,1,float16,fp8,0,0.13371200561523439
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,16,1,128,1,fp8,fp8,0,0.13406239748001098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,16,4,128,1,float16,float16,0,0.013488000631332398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,16,2,128,1,float16,float16,0,0.11966079473495483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,16,2,128,1,float16,fp8,0,0.13428800106048583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,16,2,128,1,fp8,fp8,0,0.1337280035018921
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,16,4,128,1,float16,float16,0,0.12796159982681274
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,16,4,128,1,float16,fp8,0,0.13496479988098145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,16,4,128,1,fp8,fp8,0,0.1347040057182312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,16,8,128,1,float16,float16,0,0.14615679979324342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,16,8,128,1,float16,fp8,0,0.13489919900894165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,16,8,128,1,fp8,fp8,0,0.13541920185089112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,16,128,1,float16,float16,0,0.09658719897270203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,16,128,1,float16,fp8,0,0.09040799736976624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,16,128,1,fp8,fp8,0,0.08939200043678283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,16,2,128,1,fp8,fp8,0,0.022697600722312927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,1,128,1,float16,float16,0,0.06582239866256714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,1,128,1,float16,fp8,0,0.07184799909591674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,1,128,1,fp8,fp8,0,0.07196639776229859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,2,128,1,float16,fp8,0,0.07192320227622986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,2,128,1,fp8,fp8,0,0.0723743975162506
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,4,128,1,float16,float16,0,0.07011520266532897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,4,128,1,float16,fp8,0,0.07381600141525269
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,4,128,1,fp8,fp8,0,0.07192000150680541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,8,128,1,float16,float16,0,0.0794272005558014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,16,8,128,1,float16,float16,0,0.012457600235939026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,8,128,1,float16,fp8,0,0.07391200065612794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,8,128,1,fp8,fp8,0,0.07257919907569885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,16,128,1,float16,float16,0,0.052292799949645995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,16,128,1,float16,fp8,0,0.04948799908161163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,16,128,1,fp8,fp8,0,0.04933600127696991
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,1,128,1,float16,float16,0,0.03522399961948395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,1,128,1,float16,fp8,0,0.03925279974937439
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,1,128,1,fp8,fp8,0,0.039192000031471254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,2,128,1,float16,float16,0,0.03695200085639953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,2,128,1,fp8,fp8,0,0.03920480012893677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,2,128,1,float16,fp8,0,0.03916800022125244
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,4,128,1,float16,float16,0,0.03923999965190887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,4,128,1,float16,fp8,0,0.03914400041103363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,4,128,1,fp8,fp8,0,0.03918560147285462
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,8,128,1,float16,float16,0,0.04437600076198578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,8,128,1,float16,fp8,0,0.03920640051364899
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,16,8,128,1,fp8,fp8,0,0.039129599928855896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,16,128,1,float16,float16,0,0.029542401432991028
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,16,128,1,float16,fp8,0,0.02881760001182556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,1,128,1,float16,float16,0,0.022734400629997254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,1,128,1,float16,fp8,0,0.024771200120449068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,1,128,1,fp8,fp8,0,0.02479359954595566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,2,128,1,float16,float16,0,0.022683200240135194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,2,128,1,float16,fp8,0,0.024758400022983552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,2,128,1,fp8,fp8,0,0.024795199930667877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,16,2,128,1,float16,float16,0,0.06589440107345582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,4,128,1,float16,fp8,0,0.024843199551105498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,4,128,1,fp8,fp8,0,0.024747200310230255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,8,128,1,float16,float16,0,0.026742398738861084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,8,128,1,float16,fp8,0,0.024716800451278685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,8,128,1,fp8,fp8,0,0.024779200553894043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,16,128,1,float16,float16,0,0.01717280000448227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,16,128,1,float16,fp8,0,0.01847680062055588
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,16,128,1,fp8,fp8,0,0.018595199286937713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,1,128,1,float16,float16,0,0.01470080018043518
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,1,128,1,float16,fp8,0,0.016577599942684172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,1,128,1,fp8,fp8,0,0.01648319959640503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,2,128,1,float16,float16,0,0.01646080017089844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,2,128,1,float16,fp8,0,0.01652960032224655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,2,128,1,fp8,fp8,0,0.016497600078582763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,4,128,1,float16,float16,0,0.01647839993238449
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,4,128,1,float16,fp8,0,0.016571199893951415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,16,128,1,fp8,fp8,0,0.02897599935531616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,4,128,1,fp8,fp8,0,0.016577599942684172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,8,128,1,float16,float16,0,0.016728000342845918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,8,128,1,float16,fp8,0,0.016683200001716615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,16,128,1,float16,float16,0,0.012577599287033081
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,16,128,1,float16,fp8,0,0.012438400089740754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,16,128,1,fp8,fp8,0,0.01252480000257492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,1,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,1,128,1,float16,fp8,0,0.012488000094890594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,1,128,1,fp8,fp8,0,0.01247360035777092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,2,128,1,float16,float16,0,0.012388800084590913
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,2,128,1,float16,fp8,0,0.01244639977812767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,2,128,1,fp8,fp8,0,0.012379200011491776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,4,128,1,float16,float16,0,0.012428800016641617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,16,4,128,1,float16,float16,0,0.02475679963827133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,4,128,1,float16,fp8,0,0.012452799826860428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,4,128,1,fp8,fp8,0,0.012415999919176102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,8,128,1,float16,float16,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,8,128,1,float16,fp8,0,0.01239679977297783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,16,8,128,1,fp8,fp8,0,0.012455999851226807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,16,128,1,float16,float16,0,0.011707200109958649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,16,128,1,float16,fp8,0,0.01117279976606369
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,16,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,1,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,1,128,1,float16,fp8,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,1,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,2,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,2,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,4,128,1,float16,fp8,0,0.010547199845314026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,4,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,4,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,8,128,1,float16,float16,0,0.01186719983816147
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,8,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,16,8,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,16,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,16,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,16,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,1,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,1,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,1,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,2,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,2,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,4,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,4,128,1,float16,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,4,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,8,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,8,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,8,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,16,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,16,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,16,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,1,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,1,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,2,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,2,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,16,8,128,1,fp8,fp8,0,0.01711679995059967
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,2,128,1,fp8,fp8,0,0.010345599800348281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,4,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,4,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,4,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,8,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,8,128,1,float16,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,8,128,1,fp8,fp8,0,0.01053600013256073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,16,1,128,1,float16,float16,0,0.09891679883003235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,16,1,128,1,float16,fp8,0,0.10267039537429809
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,16,1,128,1,fp8,fp8,0,0.10262559652328491
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,16,2,128,1,float16,float16,0,0.09874879717826843
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,16,2,128,1,float16,fp8,0,0.10264960527420045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,16,2,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,16,2,128,1,fp8,fp8,0,0.10265439748764038
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,16,4,128,1,float16,float16,0,0.10471680164337158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,16,4,128,1,float16,fp8,0,0.10273920297622681
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,16,4,128,1,fp8,fp8,0,0.10266879796981812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,16,8,128,1,float16,float16,0,0.11175520420074463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,16,1,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,16,8,128,1,float16,fp8,0,0.10308480262756348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,16,8,128,1,fp8,fp8,0,0.10264960527420045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,16,128,1,float16,fp8,0,0.06562719941139221
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,16,128,1,fp8,fp8,0,0.0656544029712677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,1,128,1,float16,float16,0,0.05340480208396912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,1,128,1,float16,fp8,0,0.055491197109222415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,1,128,1,fp8,fp8,0,0.05553119778633118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,2,128,1,float16,float16,0,0.05353599786758423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,2,128,1,float16,fp8,0,0.05546879768371582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,2,128,1,fp8,fp8,0,0.05555679798126221
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,4,128,1,float16,float16,0,0.05759360194206238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,4,128,1,float16,fp8,0,0.05557119846343994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,4,128,1,fp8,fp8,0,0.055478399991989134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,8,128,1,float16,float16,0,0.061673599481582644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,8,128,1,float16,fp8,0,0.05567839741706848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,16,128,1,float16,float16,0,0.037136000394821164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,16,128,1,float16,fp8,0,0.0350959986448288
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,16,128,1,fp8,fp8,0,0.035129600763320924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,1,128,1,float16,float16,0,0.03135679960250855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,1,128,1,float16,fp8,0,0.030935999751091004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,1,128,1,fp8,fp8,0,0.031020799279212953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,2,128,1,float16,float16,0,0.03091199994087219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,2,128,1,float16,fp8,0,0.031067198514938353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,2,128,1,fp8,fp8,0,0.031112000346183777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,4,128,1,float16,float16,0,0.03195039927959442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,4,128,1,float16,fp8,0,0.03097440004348755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,4,128,1,fp8,fp8,0,0.03094559907913208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,8,128,1,float16,float16,0,0.03322719931602478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,8,128,1,float16,fp8,0,0.030888000130653383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,16,8,128,1,fp8,fp8,0,0.031020799279212953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,16,128,1,float16,float16,0,0.02264000028371811
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,16,128,1,float16,fp8,0,0.022679999470710754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,16,128,1,fp8,fp8,0,0.022673599421977997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,1,128,1,float16,float16,0,0.020721599459648132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,1,128,1,float16,fp8,0,0.020681600272655486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,1,128,1,fp8,fp8,0,0.020678399503231047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,2,128,1,float16,float16,0,0.02069759964942932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,2,128,1,float16,fp8,0,0.02061759978532791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,2,128,1,fp8,fp8,0,0.020662400126457214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,4,128,1,float16,float16,0,0.020703999698162077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,4,128,1,float16,fp8,0,0.02064799964427948
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,4,128,1,fp8,fp8,0,0.02059199959039688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,8,128,1,float16,float16,0,0.020707200467586517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,8,128,1,float16,fp8,0,0.02062560021877289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,16,8,128,1,fp8,fp8,0,0.02062080055475235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,8,128,1,fp8,fp8,0,0.05567520260810852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,16,128,1,float16,float16,0,0.014603200554847717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,16,128,1,float16,fp8,0,0.014528000354766845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,16,128,1,fp8,fp8,0,0.014641599357128143
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,1,128,1,float16,float16,0,0.014443199336528777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,1,128,1,fp8,fp8,0,0.014507199823856353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,2,128,1,float16,float16,0,0.014505599439144135
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,2,128,1,float16,fp8,0,0.014500799775123595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,2,128,1,fp8,fp8,0,0.014534400403499603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,4,128,1,float16,fp8,0,0.014452800154685974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,4,128,1,fp8,fp8,0,0.014569599926471711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,8,128,1,float16,float16,0,0.014638400077819825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,8,128,1,float16,fp8,0,0.014414399862289429
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,8,128,1,fp8,fp8,0,0.014505599439144135
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,16,128,1,float16,float16,0,0.012596799433231354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,16,128,1,float16,fp8,0,0.012390399724245072
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,16,128,1,fp8,fp8,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,1,128,1,float16,float16,0,0.010518400371074677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,1,128,1,float16,fp8,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,1,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,2,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,16,16,128,1,float16,float16,0,0.06781280040740967
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,2,128,1,float16,fp8,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,2,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,4,128,1,float16,float16,0,0.0106175996363163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,4,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,4,128,1,fp8,fp8,0,0.010542400181293488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,8,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,8,128,1,float16,fp8,0,0.010527999699115753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,16,8,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,16,128,1,float16,float16,0,0.010552000254392624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,16,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,16,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,1,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,1,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,1,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,2,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,2,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,2,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,1,128,1,float16,fp8,0,0.014545600116252898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,4,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,4,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,4,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,16,4,128,1,float16,float16,0,0.014596800506114959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,8,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,8,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,16,8,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,16,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,16,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,16,128,1,fp8,fp8,0,0.010334400087594986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,1,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,1,128,1,fp8,fp8,0,0.010351999849081039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,2,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,2,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,2,128,1,fp8,fp8,0,0.010214400291442872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,4,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,4,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,4,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,8,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,8,128,1,float16,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,8,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,16,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,16,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,16,128,1,fp8,fp8,0,0.009944000095129014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,1,128,1,float16,fp8,0,0.00859839990735054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,1,128,1,fp8,fp8,0,0.008505599945783615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,2,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,2,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,2,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,4,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,4,128,1,float16,fp8,0,0.009454400092363358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,4,128,1,fp8,fp8,0,0.009380800276994705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,8,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,8,128,1,float16,fp8,0,0.009312000125646591
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,8,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,16,1,128,1,float16,float16,0,0.08453919887542724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,16,1,128,1,float16,fp8,0,0.08417119979858398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,16,1,128,1,fp8,fp8,0,0.08422399759292602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,16,1,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,16,2,128,1,float16,float16,0,0.08618720173835755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,16,2,128,1,float16,fp8,0,0.08415200114250183
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,16,2,128,1,fp8,fp8,0,0.08414239883422851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,16,4,128,1,float16,float16,0,0.08831359744071961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,16,4,128,1,float16,fp8,0,0.08423839807510376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,16,4,128,1,fp8,fp8,0,0.08416000008583069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,16,8,128,1,float16,float16,0,0.09251840114593506
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,16,8,128,1,float16,fp8,0,0.08557760119438171
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,16,8,128,1,fp8,fp8,0,0.0841376006603241
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,16,128,1,float16,float16,0,0.05345439910888672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,16,1,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,16,128,1,float16,fp8,0,0.05106880068778992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,16,128,1,fp8,fp8,0,0.05139679908752441
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,1,128,1,float16,float16,0,0.04725759923458099
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,1,128,1,float16,fp8,0,0.047177600860595706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,1,128,1,fp8,fp8,0,0.04733920097351074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,2,128,1,float16,float16,0,0.047276800870895384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,2,128,1,float16,fp8,0,0.047367998957633974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,2,128,1,fp8,fp8,0,0.04729920029640198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,4,128,1,float16,float16,0,0.04737119972705841
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,4,128,1,float16,fp8,0,0.04725280106067657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,4,128,1,fp8,fp8,0,0.04739519953727722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,8,128,1,float16,float16,0,0.049439999461174014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,8,128,1,float16,fp8,0,0.04734080135822296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,16,8,128,1,fp8,fp8,0,0.04732640087604523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,16,128,1,float16,float16,0,0.028870400786399842
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,16,128,1,float16,fp8,0,0.028969600796699524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,16,128,1,fp8,fp8,0,0.028969600796699524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,1,128,1,float16,float16,0,0.028830400109291075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,1,128,1,fp8,fp8,0,0.026932799816131593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,1,128,1,float16,fp8,0,0.026867198944091796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,2,128,1,float16,fp8,0,0.02680320143699646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,2,128,1,fp8,fp8,0,0.026931199431419372
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,4,128,1,float16,fp8,0,0.026876801252365114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,4,128,1,fp8,fp8,0,0.02693440020084381
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,8,128,1,float16,float16,0,0.02884800136089325
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,8,128,1,float16,fp8,0,0.027062401175498962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,8,128,1,fp8,fp8,0,0.02686080038547516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,16,128,1,float16,float16,0,0.020691199600696562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,16,128,1,float16,fp8,0,0.01868479996919632
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,16,128,1,fp8,fp8,0,0.020057600736618043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,1,128,1,float16,float16,0,0.018590399622917177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,1,128,1,float16,fp8,0,0.0186831995844841
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,2,128,1,float16,float16,0,0.01879040002822876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,2,128,1,float16,fp8,0,0.018492799997329713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,2,128,1,fp8,fp8,0,0.018671999871730804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,4,128,1,float16,float16,0,0.018905599415302277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,4,128,1,float16,fp8,0,0.01873279958963394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,4,128,1,fp8,fp8,0,0.018676799535751343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,8,128,1,float16,float16,0,0.02051360011100769
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,8,128,1,float16,fp8,0,0.01860000044107437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,8,128,1,fp8,fp8,0,0.01865279972553253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,16,128,1,float16,float16,0,0.014595200121402741
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,16,128,1,float16,fp8,0,0.014316800236701965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,16,128,1,fp8,fp8,0,0.014508800208568573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,1,128,1,float16,float16,0,0.01249919980764389
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,2,128,1,float16,float16,0,0.028673601150512696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,16,4,128,1,float16,float16,0,0.02881920039653778
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,1,128,1,float16,fp8,0,0.012641599774360657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,1,128,1,fp8,fp8,0,0.012481600046157837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,2,128,1,float16,fp8,0,0.01250080019235611
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,2,128,1,fp8,fp8,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,4,128,1,float16,float16,0,0.013704000413417817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,4,128,1,float16,fp8,0,0.012515200674533844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,16,1,128,1,fp8,fp8,0,0.018515199422836304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,4,128,1,fp8,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,8,128,1,float16,float16,0,0.014033600687980652
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,8,128,1,float16,fp8,0,0.012491200119256973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,8,128,1,fp8,fp8,0,0.01244800016283989
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,16,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,16,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,1,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,1,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,1,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,2,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,2,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,2,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,4,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,4,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,4,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,8,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,8,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,8,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,16,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,16,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,16,2,128,1,float16,float16,0,0.01449279934167862
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,16,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,1,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,1,128,1,float16,fp8,0,0.009691199660301209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,1,128,1,fp8,fp8,0,0.01029599979519844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,2,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,2,128,1,fp8,fp8,0,0.009326399862766266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,4,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,4,128,1,float16,fp8,0,0.009487999975681305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,4,128,1,fp8,fp8,0,0.010345599800348281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,8,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,8,128,1,float16,fp8,0,0.0103472001850605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,16,8,128,1,fp8,fp8,0,0.00968960002064705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,16,16,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,16,128,1,float16,float16,0,0.010361599922180175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,16,128,1,fp8,fp8,0,0.00873119980096817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,1,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,1,128,1,float16,fp8,0,0.008404800295829773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,1,128,1,fp8,fp8,0,0.009995199739933014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,2,128,1,float16,float16,0,0.010344000160694122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,2,128,1,float16,fp8,0,0.010283199697732925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,2,128,1,fp8,fp8,0,0.008396799862384795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,4,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,4,128,1,float16,fp8,0,0.008484800159931184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,4,128,1,fp8,fp8,0,0.010310400277376175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,8,128,1,float16,float16,0,0.009848000109195709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,8,128,1,fp8,fp8,0,0.00846560001373291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,16,128,1,float16,float16,0,0.010318399965763092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,16,128,1,float16,fp8,0,0.009673599898815156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,16,128,1,fp8,fp8,0,0.009497600048780442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,1,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,1,128,1,float16,fp8,0,0.008489599823951722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,1,128,1,fp8,fp8,0,0.009249600023031235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,2,128,1,float16,float16,0,0.010361599922180175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,2,128,1,float16,fp8,0,0.009275200217962265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,2,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,4,128,1,float16,float16,0,0.010355199873447418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,4,128,1,float16,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,4,128,1,fp8,fp8,0,0.009353599697351455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,8,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,8,128,1,float16,fp8,0,0.009603200107812881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,16,8,128,1,fp8,fp8,0,0.00921280011534691
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,16,1,128,1,float16,float16,0,0.0802784025669098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,16,1,128,1,float16,fp8,0,0.0780623972415924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,16,1,128,1,fp8,fp8,0,0.07800319790840149
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,16,2,128,1,float16,float16,0,0.08031200170516968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,16,2,128,1,float16,fp8,0,0.07801759839057923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,16,2,128,1,fp8,fp8,0,0.0781279981136322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,16,4,128,1,float16,float16,0,0.08212640285491943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,16,4,128,1,float16,fp8,0,0.07811200022697448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,16,4,128,1,fp8,fp8,0,0.07809759974479676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,16,8,128,1,float16,float16,0,0.08423839807510376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,16,8,128,1,float16,fp8,0,0.0780672013759613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,16,8,128,1,fp8,fp8,0,0.0780463993549347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,16,128,1,float16,float16,0,0.04731839895248413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,16,128,1,float16,fp8,0,0.008683200180530547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,16,128,1,float16,fp8,0,0.045326399803161624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,16,128,1,fp8,fp8,0,0.04534879922866821
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,1,128,1,float16,float16,0,0.04530560076236725
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,1,128,1,fp8,fp8,0,0.04322080016136169
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,2,128,1,float16,float16,0,0.04524320065975189
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,2,128,1,float16,fp8,0,0.04321439862251282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,2,128,1,fp8,fp8,0,0.04334079921245575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,4,128,1,float16,float16,0,0.045326399803161624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,16,8,128,1,float16,fp8,0,0.010299199819564819
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,4,128,1,fp8,fp8,0,0.04325920045375824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,8,128,1,float16,float16,0,0.046998399496078494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,8,128,1,float16,fp8,0,0.043268799781799316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,8,128,1,fp8,fp8,0,0.04335840046405792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,16,128,1,float16,float16,0,0.027500799298286437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,16,128,1,float16,fp8,0,0.02694239914417267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,16,128,1,fp8,fp8,0,0.026900801062583923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,1,128,1,float16,float16,0,0.02687999904155731
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,1,128,1,float16,fp8,0,0.026759999990463256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,1,128,1,fp8,fp8,0,0.026859200000762938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,2,128,1,float16,float16,0,0.026822400093078614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,2,128,1,float16,fp8,0,0.026756799221038817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,2,128,1,fp8,fp8,0,0.026732799410820008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,4,128,1,float16,float16,0,0.026980799436569215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,4,128,1,float16,fp8,0,0.026761600375175477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,4,128,1,fp8,fp8,0,0.02672159969806671
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,8,128,1,float16,float16,0,0.028830400109291075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,8,128,1,float16,fp8,0,0.02672159969806671
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,16,8,128,1,fp8,fp8,0,0.0268640011548996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,16,128,1,float16,float16,0,0.018811200559139252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,16,128,1,float16,fp8,0,0.018572799861431122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,16,128,1,fp8,fp8,0,0.018568000197410582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,1,128,1,float16,float16,0,0.018478399515151976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,1,128,1,float16,fp8,0,0.018607999384403228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,4,128,1,float16,fp8,0,0.0432671993970871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,1,128,1,fp8,fp8,0,0.018326400220394133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,2,128,1,float16,float16,0,0.018646399676799773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,2,128,1,float16,fp8,0,0.01852000057697296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,2,128,1,fp8,fp8,0,0.018638400733470915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,4,128,1,float16,float16,0,0.018588800728321076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,4,128,1,float16,fp8,0,0.01860959976911545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,4,128,1,fp8,fp8,0,0.0182096004486084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,8,128,1,float16,fp8,0,0.018462400138378143
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,8,128,1,fp8,fp8,0,0.018537600338459016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,16,128,1,float16,float16,0,0.014483200013637542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,16,128,1,float16,fp8,0,0.012630400061607362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,16,128,1,fp8,fp8,0,0.01257600039243698
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,1,128,1,float16,float16,0,0.012643200159072877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,1,128,1,float16,fp8,0,0.012572799623012543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,1,128,1,fp8,fp8,0,0.012611199915409089
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,2,128,1,float16,float16,0,0.012777599692344665
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,2,128,1,float16,fp8,0,0.012559999525547028
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,2,128,1,fp8,fp8,0,0.012572799623012543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,4,128,1,float16,float16,0,0.012540799379348756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,4,128,1,float16,fp8,0,0.012614400684833526
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,4,128,1,fp8,fp8,0,0.012603199481964112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,8,128,1,float16,float16,0,0.013366399705410004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,16,1,128,1,float16,fp8,0,0.043300798535346983
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,8,128,1,float16,fp8,0,0.012591999769210816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,16,8,128,1,fp8,fp8,0,0.012596799433231354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,16,128,1,float16,float16,0,0.01053759977221489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,16,128,1,fp8,fp8,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,16,128,1,float16,fp8,0,0.010619200021028518
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,1,128,1,float16,float16,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,1,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,2,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,2,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,2,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,4,128,1,float16,float16,0,0.010569600015878677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,4,128,1,float16,fp8,0,0.010339199751615524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,4,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,8,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,8,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,8,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,16,8,128,1,float16,float16,0,0.018615999817848207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,16,128,1,float16,float16,0,0.010366400331258773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,16,128,1,float16,fp8,0,0.010307200253009796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,16,128,1,fp8,fp8,0,0.01010880023241043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,1,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,1,128,1,fp8,fp8,0,0.008511999994516373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,2,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,2,128,1,float16,fp8,0,0.009091199934482574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,2,128,1,fp8,fp8,0,0.010331200063228607
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,4,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,4,128,1,float16,fp8,0,0.010332799702882766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,4,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,8,128,1,float16,float16,0,0.010355199873447418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,8,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,8,128,1,fp8,fp8,0,0.010294400155544281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,16,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,16,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,16,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,1,128,1,float16,float16,0,0.010308799892663955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,1,128,1,float16,fp8,0,0.008463999629020691
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,1,128,1,fp8,fp8,0,0.01032480001449585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,2,128,1,float16,float16,0,0.009393599629402161
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,2,128,1,float16,fp8,0,0.009537599980831146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,2,128,1,fp8,fp8,0,0.008390399813652038
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,4,128,1,float16,float16,0,0.010286399722099304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,4,128,1,float16,fp8,0,0.008392000198364257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,4,128,1,fp8,fp8,0,0.009547200053930283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,8,128,1,float16,float16,0,0.009404800087213516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,8,128,1,float16,fp8,0,0.009427200257778167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,16,8,128,1,fp8,fp8,0,0.008444800227880477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,16,128,1,float16,float16,0,0.010334400087594986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,16,128,1,float16,fp8,0,0.008488000184297562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,16,128,1,fp8,fp8,0,0.009123200178146362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,1,128,1,float16,float16,0,0.00835679993033409
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,1,128,1,float16,fp8,0,0.008459199965000153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,1,128,1,fp8,fp8,0,0.008463999629020691
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,2,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,2,128,1,float16,fp8,0,0.0087567999958992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,2,128,1,fp8,fp8,0,0.010339199751615524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,4,128,1,float16,float16,0,0.010340800136327743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,4,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,4,128,1,fp8,fp8,0,0.008454400300979614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,16,1,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,8,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,8,128,1,float16,fp8,0,0.008404800295829773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,16,8,128,1,fp8,fp8,0,0.008404800295829773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,0,0.0801360011100769
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,0,0.07400320172309875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,16,1,128,1,fp8,fp8,0,0.0743503987789154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,0,0.08012319803237915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,0,0.07431679964065552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,16,1,128,1,float16,float16,0,0.010358399897813796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,0,0.08011360168457031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,0,0.07393919825553893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,16,4,128,1,fp8,fp8,0,0.07482560276985169
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,0,0.08009600043296813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,0,0.07411999702453613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,16,8,128,1,fp8,fp8,0,0.07419679760932922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,0,0.04519839882850647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,0,0.042185598611831666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,16,128,1,fp8,fp8,0,0.04116159975528717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,0,0.04531840085983276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,0,0.04115679860115051
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,1,128,1,fp8,fp8,0,0.041238400340080264
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,0,0.04381439983844757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,0,0.04162079989910126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,2,128,1,fp8,fp8,0,0.0411215990781784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,0,0.04524320065975189
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,0,0.041254401206970215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,4,128,1,fp8,fp8,0,0.04197440147399902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,0,0.04348799884319306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,0,0.0414112001657486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,16,8,128,1,fp8,fp8,0,0.04113920032978058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,0,0.026851201057434083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,0,0.02475679963827133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,16,128,1,fp8,fp8,0,0.02479040026664734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,0,0.02683199942111969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,0,0.02486719936132431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,1,128,1,fp8,fp8,0,0.024771200120449068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,0,0.026804798841476442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,2,128,1,fp8,fp8,0,0.024857600033283234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,0,0.0248416006565094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,0,0.024809600412845613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,0,0.02688960134983063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,4,128,1,fp8,fp8,0,0.02481919974088669
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,0,0.026878398656845093
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,0,0.024868799746036528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,16,8,128,1,fp8,fp8,0,0.024804799258708952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,0,0.01870719939470291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,0,0.01658879965543747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,16,128,1,fp8,fp8,0,0.016579200327396394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,0,0.01852319985628128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,16,2,128,1,fp8,fp8,0,0.07391520142555237
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,0,0.016678400337696075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,1,128,1,fp8,fp8,0,0.016595199704170227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,0,0.018508799374103546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,0,0.016620799899101257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,2,128,1,fp8,fp8,0,0.016676799952983858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,0,0.018614399433135986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,0,0.016652800142765045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,4,128,1,fp8,fp8,0,0.01672479957342148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,0,0.018588800728321076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,0,0.016564799845218657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,16,8,128,1,fp8,fp8,0,0.016711999475955964
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,0,0.012956799566745758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,0,0.012452799826860428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,16,128,1,fp8,fp8,0,0.012566399574279786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,0,0.012483199685811996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,1,128,1,fp8,fp8,0,0.012598399817943574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,0,0.012540799379348756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,0,0.012460800260305405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,2,128,1,fp8,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,0,0.012544000148773193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,4,128,1,fp8,fp8,0,0.012561599910259246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,0,0.014083200693130493
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,16,8,128,1,fp8,fp8,0,0.012531200051307678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,16,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,1,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,2,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,0,0.010366400331258773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,8,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,0,0.008454400300979614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,16,128,1,fp8,fp8,0,0.01032159999012947
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,1,128,1,fp8,fp8,0,0.009387200325727462
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,0,0.010360000282526016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,2,128,1,fp8,fp8,0,0.010339199751615524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,0,0.010358399897813796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,0,0.009404800087213516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,4,128,1,fp8,fp8,0,0.009643200039863586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,16,8,128,1,fp8,fp8,0,0.010337600111961364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,0,0.008472000062465668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,16,128,1,fp8,fp8,0,0.008448000252246856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,0,0.0084927998483181
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,0,0.008367999643087386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,1,128,1,fp8,fp8,0,0.008444800227880477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,0,0.00870240032672882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,0,0.008392000198364257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,2,128,1,fp8,fp8,0,0.008470399677753449
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,0,0.008396799862384795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,4,128,1,fp8,fp8,0,0.008459199965000153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,0,0.00841120034456253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,16,8,128,1,fp8,fp8,0,0.008353599905967712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,0,0.008377599716186523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,16,4,128,1,fp8,fp8,0,0.010688000172376633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,0,0.008723200112581254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,16,128,1,fp8,fp8,0,0.008374399691820144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,0,0.008364800363779068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,1,128,1,fp8,fp8,0,0.00838399976491928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,0,0.00910400003194809
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,0,0.0083856001496315
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,2,128,1,fp8,fp8,0,0.008479999750852585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,0,0.008463999629020691
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,0,0.009489600360393525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,4,128,1,fp8,fp8,0,0.008497600257396699
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,0,0.008537600189447403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,8,128,1,fp8,fp8,0,0.008454400300979614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,0,0.010003200173377991
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,12,1,128,1,float16,float16,0,3.7306175231933594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,12,1,128,1,float16,fp8,0,3.0275392532348633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,12,1,128,1,fp8,fp8,0,3.045947265625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,12,2,128,1,float16,float16,0,3.585084915161133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,12,2,128,1,fp8,fp8,0,3.037182426452637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,12,4,128,1,float16,float16,0,4.04192008972168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,12,4,128,1,float16,fp8,0,3.2533519744873045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,12,2,128,1,float16,fp8,0,3.114838409423828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,12,4,128,1,fp8,fp8,0,3.047171211242676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,12,1,128,1,float16,float16,0,1.8349903106689454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,12,12,128,1,float16,fp8,0,1.6390480041503905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,12,1,128,1,float16,fp8,0,1.7736127853393555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,12,1,128,1,fp8,fp8,0,1.5602687835693358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,12,2,128,1,float16,float16,0,1.8435903549194337
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,12,2,128,1,float16,fp8,0,1.5640031814575195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,12,2,128,1,fp8,fp8,0,1.6000944137573243
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,12,12,128,1,fp8,fp8,0,1.6380016326904296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,12,4,128,1,float16,float16,0,1.8672943115234375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,12,4,128,1,float16,fp8,0,1.5639455795288086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,12,4,128,1,fp8,fp8,0,1.5672623634338378
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,12,12,128,1,float16,fp8,0,0.8661567687988281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,12,12,128,1,fp8,fp8,0,0.8672863960266113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,12,1,128,1,float16,fp8,0,0.8252832412719726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,12,1,128,1,float16,float16,0,0.9646559715270996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,12,1,128,1,fp8,fp8,0,0.8249648094177247
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,12,2,128,1,float16,float16,0,0.969660758972168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,12,2,128,1,float16,fp8,0,0.8254336357116699
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,12,2,128,1,fp8,fp8,0,0.8469391822814941
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,12,4,128,1,float16,float16,0,0.988430404663086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,12,4,128,1,float16,fp8,0,0.9052000045776367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,12,4,128,1,fp8,fp8,0,0.8366928100585938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,12,12,128,1,float16,fp8,0,0.48044958114624026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,12,12,128,1,fp8,fp8,0,0.4808032035827637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,12,1,128,1,float16,float16,0,0.5311344146728516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,12,1,128,1,float16,fp8,0,0.4622767925262451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,12,1,128,1,fp8,fp8,0,0.4558080196380615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,12,2,128,1,float16,float16,0,0.5333312034606934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,12,2,128,1,float16,fp8,0,0.4566256046295166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,12,2,128,1,fp8,fp8,0,0.45664639472961427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,12,4,128,1,float16,float16,0,0.5375743865966797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,12,4,128,1,float16,fp8,0,0.46262240409851074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,12,4,128,1,fp8,fp8,0,0.4573791980743408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,12,1,128,1,float16,float16,0,2.114308738708496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,12,1,128,1,float16,fp8,0,1.9621488571166992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,12,1,128,1,fp8,fp8,0,1.8162864685058593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,12,2,128,1,float16,float16,0,2.12945442199707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,12,2,128,1,float16,fp8,0,1.988915252685547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,12,2,128,1,fp8,fp8,0,1.8182928085327148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,12,4,128,1,float16,float16,0,2.190500831604004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,12,4,128,1,float16,fp8,0,1.8967727661132812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,12,4,128,1,fp8,fp8,0,1.8146928787231444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,12,12,128,1,float16,float16,0,2.0521392822265625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,12,12,128,1,float16,float16,0,1.0237279891967774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,12,12,128,1,float16,fp8,0,1.0411680221557618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,12,12,128,1,fp8,fp8,0,1.0385168075561524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,12,12,128,1,float16,float16,0,0.5674704074859619
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,12,1,128,1,float16,float16,0,1.1227919578552246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,12,1,128,1,float16,fp8,0,0.950107192993164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,12,1,128,1,fp8,fp8,0,0.9381567955017089
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,12,2,128,1,float16,float16,0,1.1303584098815918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,12,2,128,1,float16,fp8,0,0.9381744384765625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,12,2,128,1,fp8,fp8,0,0.938593578338623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,12,4,128,1,float16,float16,0,1.1512592315673829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,12,4,128,1,float16,fp8,0,0.9411392211914062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,12,12,128,1,float16,float16,0,0.6284912109375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,12,4,128,1,fp8,fp8,0,1.079576015472412
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,12,12,128,1,float16,float16,0,1.1964192390441895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,12,12,128,1,float16,fp8,0,0.5364927768707275
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,12,12,128,1,fp8,fp8,0,0.6121263980865479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,12,1,128,1,float16,float16,0,0.5820784091949462
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,12,1,128,1,float16,fp8,0,0.507747220993042
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,12,1,128,1,fp8,fp8,0,0.5055471897125244
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,12,2,128,1,float16,fp8,0,0.5061088085174561
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,12,2,128,1,fp8,fp8,0,0.5069007873535156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,12,4,128,1,float16,float16,0,0.5896399974822998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,12,4,128,1,float16,fp8,0,0.5066304206848145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,12,4,128,1,fp8,fp8,0,0.5049983978271484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,12,12,128,1,float16,float16,0,0.3616015911102295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,12,12,128,1,float16,fp8,0,0.3043296098709106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,12,12,128,1,fp8,fp8,0,0.3048543930053711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,12,1,128,1,float16,float16,0,0.32072160243988035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,12,1,128,1,fp8,fp8,0,0.28658719062805177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,12,2,128,1,float16,float16,0,0.3220031976699829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,12,2,128,1,float16,fp8,0,0.28700640201568606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,12,2,128,1,fp8,fp8,0,0.2864831924438477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,12,2,128,1,float16,float16,0,0.5841728210449219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,12,4,128,1,float16,float16,0,0.32861919403076173
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,12,4,128,1,float16,fp8,0,0.2861216068267822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,12,4,128,1,fp8,fp8,0,0.28681440353393556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,12,1,128,1,float16,fp8,0,1.3100383758544922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,12,1,128,1,float16,float16,0,1.4837663650512696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,12,1,128,1,fp8,fp8,0,1.309447956085205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,12,1,128,1,float16,fp8,0,0.2871119976043701
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,12,2,128,1,float16,float16,0,1.4889151573181152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,12,2,128,1,float16,fp8,0,1.474772834777832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,12,2,128,1,fp8,fp8,0,1.3137519836425782
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,12,4,128,1,float16,float16,0,1.6334815979003907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,12,12,128,1,float16,float16,0,0.8744159698486328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,12,4,128,1,float16,fp8,0,1.3170111656188965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,12,4,128,1,fp8,fp8,0,1.313905620574951
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,12,12,128,1,float16,fp8,0,0.8201616287231446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,12,12,128,1,fp8,fp8,0,0.7747663974761962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,12,1,128,1,float16,float16,0,0.7717088222503662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,12,1,128,1,float16,fp8,0,0.6894832134246827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,12,1,128,1,fp8,fp8,0,0.7181488037109375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,12,2,128,1,float16,float16,0,0.7958752155303955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,12,2,128,1,float16,fp8,0,0.6920639991760253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,12,2,128,1,fp8,fp8,0,0.6865392208099366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,12,4,128,1,float16,float16,0,0.8230208396911621
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,12,4,128,1,float16,fp8,0,0.6965072154998779
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,12,12,128,1,float16,float16,0,0.46825599670410156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,12,4,128,1,fp8,fp8,0,0.7327888011932373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,12,12,128,1,float16,fp8,0,0.4029967784881592
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,12,12,128,1,fp8,fp8,0,0.402188777923584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,12,1,128,1,float16,float16,0,0.41785922050476076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,12,1,128,1,float16,fp8,0,0.39707839488983154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,12,1,128,1,fp8,fp8,0,0.37469120025634767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,12,2,128,1,float16,float16,0,0.42472000122070314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,12,2,128,1,fp8,fp8,0,0.37242400646209717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,12,4,128,1,float16,float16,0,0.4401887893676758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,12,4,128,1,float16,fp8,0,0.37321760654449465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,12,4,128,1,fp8,fp8,0,0.37363519668579104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,12,12,128,1,float16,float16,0,0.27234079837799074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,12,12,128,1,float16,fp8,0,0.22975358963012696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,12,12,128,1,fp8,fp8,0,0.22978880405426025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,12,1,128,1,float16,float16,0,0.24363360404968262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,12,1,128,1,float16,fp8,0,0.21708641052246094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,12,1,128,1,fp8,fp8,0,0.21588799953460694
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,12,2,128,1,float16,float16,0,0.24442241191864014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,12,2,128,1,float16,fp8,0,0.2164815902709961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,12,2,128,1,fp8,fp8,0,0.2155776023864746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,12,4,128,1,float16,float16,0,0.248473596572876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,12,4,128,1,float16,fp8,0,0.21674079895019532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,12,4,128,1,fp8,fp8,0,0.21680800914764403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,12,1,128,1,float16,fp8,0,1.7342336654663086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,12,2,128,1,float16,fp8,0,0.3733520030975342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,12,1,128,1,fp8,fp8,0,1.735468864440918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,12,1,128,1,float16,float16,0,2.0147504806518555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,12,2,128,1,float16,fp8,0,1.7322959899902344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,12,2,128,1,fp8,fp8,0,1.7393024444580079
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,12,2,128,1,float16,float16,0,1.9690128326416017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,12,4,128,1,float16,float16,0,2.157212829589844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,12,12,128,1,float16,float16,0,1.2013824462890625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,12,12,128,1,float16,fp8,0,1.1435487747192383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,12,4,128,1,float16,fp8,0,1.7376895904541017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,12,4,128,1,fp8,fp8,0,1.7446575164794922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,12,12,128,1,fp8,fp8,0,1.0495759963989257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,12,1,128,1,float16,fp8,0,0.8921615600585937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,12,1,128,1,fp8,fp8,0,0.8935135841369629
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,12,2,128,1,float16,float16,0,1.0111231803894043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,12,2,128,1,float16,fp8,0,0.8930576324462891
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,12,2,128,1,fp8,fp8,0,0.8929776191711426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,12,4,128,1,float16,fp8,0,0.8957360267639161
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,12,4,128,1,float16,float16,0,1.0405072212219237
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,12,4,128,1,fp8,fp8,0,0.9000592231750488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,12,12,128,1,float16,fp8,0,0.5340528011322021
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,12,12,128,1,float16,float16,0,0.5973104000091553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,12,1,128,1,float16,fp8,0,0.47177600860595703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,12,1,128,1,float16,float16,0,1.058080005645752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,12,1,128,1,fp8,fp8,0,0.4717775821685791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,12,2,128,1,float16,float16,0,0.5421440124511718
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,12,2,128,1,float16,fp8,0,0.47261438369750974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,12,2,128,1,fp8,fp8,0,0.4739823818206787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,12,4,128,1,float16,float16,0,0.5538303852081299
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,12,4,128,1,float16,fp8,0,0.48052639961242677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,12,12,128,1,fp8,fp8,0,0.5126207828521728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,12,12,128,1,float16,float16,0,0.32753279209136965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,12,1,128,1,float16,float16,0,0.5282383918762207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,12,12,128,1,float16,fp8,0,0.2830303907394409
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,12,12,128,1,fp8,fp8,0,0.2837552070617676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,12,1,128,1,float16,float16,0,0.2867327928543091
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,12,1,128,1,float16,fp8,0,0.2604448080062866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,12,1,128,1,fp8,fp8,0,0.26064960956573485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,12,2,128,1,float16,float16,0,0.29126079082489015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,12,2,128,1,float16,fp8,0,0.26048319339752196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,12,2,128,1,fp8,fp8,0,0.2619424104690552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,12,4,128,1,float16,float16,0,0.29546399116516114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,12,4,128,1,float16,fp8,0,0.2617871999740601
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,12,4,128,1,fp8,fp8,0,0.2617023944854736
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,12,12,128,1,float16,float16,0,0.19415680170059205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,12,12,128,1,float16,fp8,0,0.16391199827194214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,12,12,128,1,fp8,fp8,0,0.1641535997390747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,12,1,128,1,float16,float16,0,0.16781280040740967
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,12,1,128,1,float16,fp8,0,0.15381120443344115
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,12,1,128,1,fp8,fp8,0,0.1539728045463562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,12,2,128,1,float16,float16,0,0.1684831976890564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,12,2,128,1,float16,fp8,0,0.15381280183792115
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,12,2,128,1,fp8,fp8,0,0.1544111967086792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,12,4,128,1,float16,float16,0,0.1699344038963318
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,12,4,128,1,fp8,fp8,0,0.4752768039703369
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,12,4,128,1,float16,fp8,0,0.15354080200195314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,12,4,128,1,fp8,fp8,0,0.15467679500579834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,12,1,128,1,fp8,fp8,0,1.0614543914794923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,12,1,128,1,float16,float16,0,1.148798370361328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,12,2,128,1,float16,float16,0,1.1699952125549316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,12,2,128,1,float16,fp8,0,1.0633888244628906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,12,2,128,1,fp8,fp8,0,1.064249610900879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,12,4,128,1,float16,float16,0,1.2232447624206544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,12,4,128,1,float16,fp8,0,1.072545623779297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,12,12,128,1,float16,float16,0,0.7275728225708008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,12,4,128,1,fp8,fp8,0,1.0780192375183106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,12,12,128,1,float16,fp8,0,0.6102176189422608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,12,12,128,1,fp8,fp8,0,0.6288752079010009
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,12,1,128,1,float16,float16,0,0.5942543983459473
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,12,1,128,1,float16,fp8,0,1.0648240089416503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,12,1,128,1,float16,fp8,0,0.5513792037963867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,12,1,128,1,fp8,fp8,0,0.5544064044952393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,12,2,128,1,float16,float16,0,0.6002463817596435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,12,2,128,1,float16,fp8,0,0.5541024208068848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,12,2,128,1,fp8,fp8,0,0.5521967887878418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,12,4,128,1,float16,float16,0,0.6260079860687255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,12,4,128,1,float16,fp8,0,0.5551199913024902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,12,12,128,1,float16,float16,0,0.375164794921875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,12,12,128,1,float16,fp8,0,0.33036959171295166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,12,4,128,1,fp8,fp8,0,0.5563072204589844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,12,12,128,1,fp8,fp8,0,0.3284735918045044
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,12,1,128,1,float16,float16,0,0.3185551881790161
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,12,1,128,1,float16,fp8,0,0.2980319976806641
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,12,1,128,1,fp8,fp8,0,0.29749600887298583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,12,2,128,1,float16,float16,0,0.3205696105957031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,12,2,128,1,float16,fp8,0,0.2966687917709351
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,12,2,128,1,fp8,fp8,0,0.297107195854187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,12,4,128,1,float16,float16,0,0.3421583890914917
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,12,4,128,1,float16,fp8,0,0.29790239334106444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,12,4,128,1,fp8,fp8,0,0.2973839998245239
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,12,12,128,1,float16,fp8,0,0.18512320518493652
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,12,12,128,1,fp8,fp8,0,0.18498719930648805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,12,1,128,1,float16,fp8,0,0.16870559453964235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,12,1,128,1,float16,float16,0,0.17815519571304322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,12,1,128,1,fp8,fp8,0,0.16876640319824218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,12,2,128,1,float16,float16,0,0.18467040061950685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,12,2,128,1,float16,fp8,0,0.16800639629364014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,12,2,128,1,fp8,fp8,0,0.16841440200805663
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,12,4,128,1,float16,float16,0,0.19120479822158815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,12,4,128,1,float16,fp8,0,0.16767200231552123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,12,12,128,1,float16,float16,0,0.12417440414428711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,12,4,128,1,fp8,fp8,0,0.16783360242843628
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,12,12,128,1,fp8,fp8,0,0.11304320096969604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,12,1,128,1,float16,float16,0,0.11312960386276245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,12,1,128,1,float16,fp8,0,0.10664479732513428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,12,1,128,1,fp8,fp8,0,0.10691839456558228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,12,2,128,1,float16,float16,0,0.11435519456863404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,12,2,128,1,float16,fp8,0,0.10675679445266724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,12,2,128,1,fp8,fp8,0,0.10663039684295654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,12,4,128,1,float16,float16,0,0.11542400121688842
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,12,4,128,1,float16,fp8,0,0.10639679431915283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,12,4,128,1,fp8,fp8,0,0.10671360492706299
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,12,1,128,1,float16,float16,0,1.1319744110107421
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,12,12,128,1,float16,float16,0,0.2132944107055664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,12,1,128,1,float16,fp8,0,1.0657872200012206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,12,1,128,1,fp8,fp8,0,1.139566421508789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,12,12,128,1,float16,fp8,0,0.1127519965171814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,12,2,128,1,float16,float16,0,1.1195679664611817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,12,2,128,1,fp8,fp8,0,1.0718400001525878
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,12,4,128,1,float16,fp8,0,1.0721920013427735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,12,4,128,1,float16,float16,0,1.1620752334594726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,12,12,128,1,float16,fp8,0,0.6269392013549805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,12,12,128,1,float16,float16,0,0.7109839916229248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,12,4,128,1,fp8,fp8,0,1.0749551773071289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,12,1,128,1,float16,float16,0,0.5728271961212158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,12,12,128,1,fp8,fp8,0,0.627020788192749
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,12,1,128,1,float16,fp8,0,0.5515583992004395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,12,1,128,1,fp8,fp8,0,0.5502560138702393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,12,2,128,1,float16,float16,0,0.5724368095397949
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,12,2,128,1,float16,fp8,0,1.0684528350830078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,12,2,128,1,fp8,fp8,0,0.549892807006836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,12,4,128,1,float16,float16,0,0.5979135990142822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,12,4,128,1,float16,fp8,0,0.5524735927581788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,12,4,128,1,fp8,fp8,0,0.5519216060638428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,12,12,128,1,float16,float16,0,0.37047998905181884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,12,12,128,1,float16,fp8,0,0.3304064035415649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,12,12,128,1,fp8,fp8,0,0.3303312063217163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,12,1,128,1,float16,float16,0,0.29950718879699706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,12,1,128,1,float16,fp8,0,0.28994240760803225
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,12,1,128,1,fp8,fp8,0,0.29041759967803954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,12,2,128,1,float16,float16,0,0.30179998874664304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,12,2,128,1,float16,fp8,0,0.29107840061187745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,12,2,128,1,fp8,fp8,0,0.2902944087982178
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,12,4,128,1,float16,float16,0,0.31680319309234617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,12,4,128,1,float16,fp8,0,0.2914367914199829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,12,4,128,1,fp8,fp8,0,0.2917952060699463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,12,12,128,1,float16,float16,0,0.20300159454345704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,12,12,128,1,float16,fp8,0,0.18218079805374146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,12,1,128,1,float16,float16,0,0.1663632035255432
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,12,1,128,1,float16,fp8,0,0.15992159843444825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,12,1,128,1,fp8,fp8,0,0.1600592017173767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,12,2,128,1,float16,float16,0,0.1669968008995056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,12,2,128,1,float16,fp8,0,0.16028800010681152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,12,2,128,1,fp8,fp8,0,0.16106560230255126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,12,4,128,1,float16,float16,0,0.17497440576553344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,12,4,128,1,float16,fp8,0,0.1619488000869751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,12,4,128,1,fp8,fp8,0,0.1617616057395935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,12,12,128,1,float16,float16,0,0.11797759532928467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,12,12,128,1,float16,fp8,0,0.10449600219726562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,12,12,128,1,fp8,fp8,0,0.10479680299758912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,12,1,128,1,float16,float16,0,0.0983568012714386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,12,1,128,1,float16,fp8,0,0.09527519941329957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,12,1,128,1,fp8,fp8,0,0.09493759870529175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,12,2,128,1,float16,float16,0,0.09866399765014648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,12,2,128,1,float16,fp8,0,0.09574559926986695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,12,2,128,1,fp8,fp8,0,0.09538080096244812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,12,4,128,1,float16,float16,0,0.10057599544525146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,12,4,128,1,float16,fp8,0,0.0949504017829895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,12,2,128,1,float16,fp8,0,0.550822401046753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,12,4,128,1,fp8,fp8,0,0.09557759761810303
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,12,12,128,1,float16,float16,0,0.06864479780197144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,12,12,128,1,float16,fp8,0,0.0658527970314026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,12,12,128,1,fp8,fp8,0,0.18159040212631225
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,12,1,128,1,float16,float16,0,0.06604959964752197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,12,1,128,1,float16,fp8,0,0.0616208016872406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,12,1,128,1,fp8,fp8,0,0.061654400825500486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,12,2,128,1,float16,float16,0,0.06592320203781128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,12,2,128,1,float16,fp8,0,0.0625216007232666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,12,2,128,1,fp8,fp8,0,0.06164640188217163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,12,4,128,1,float16,fp8,0,0.061603200435638425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,12,4,128,1,float16,float16,0,0.06762880086898804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,12,4,128,1,fp8,fp8,0,0.061703997850418094
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,12,1,128,1,float16,float16,0,0.6821807861328125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,12,1,128,1,float16,fp8,0,0.6815072059631347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,12,1,128,1,fp8,fp8,0,0.6794608116149903
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,12,2,128,1,float16,float16,0,0.6907343864440918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,12,2,128,1,float16,fp8,0,0.6821663856506348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,12,2,128,1,fp8,fp8,0,0.680844783782959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,12,4,128,1,float16,float16,0,0.7291135787963867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,12,4,128,1,fp8,fp8,0,0.6831439971923828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,12,12,128,1,float16,float16,0,0.45174241065979004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,12,12,128,1,fp8,fp8,0,0.06675680279731751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,12,12,128,1,fp8,fp8,0,0.4108096122741699
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,12,1,128,1,float16,float16,0,0.3517119884490967
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,12,1,128,1,float16,fp8,0,0.3529247999191284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,12,1,128,1,fp8,fp8,0,0.3528575897216797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,12,2,128,1,float16,float16,0,0.3586863994598389
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,12,2,128,1,float16,fp8,0,0.35334560871124265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,12,2,128,1,fp8,fp8,0,0.3532576084136963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,12,4,128,1,float16,float16,0,0.3763904094696045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,12,4,128,1,float16,fp8,0,0.35521440505981444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,12,4,128,1,fp8,fp8,0,0.35539200305938723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,12,12,128,1,float16,float16,0,0.24035360813140869
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,12,12,128,1,float16,fp8,0,0.2195199966430664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,12,12,128,1,fp8,fp8,0,0.21923360824584961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,12,4,128,1,float16,fp8,0,0.6845039844512939
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,12,1,128,1,float16,float16,0,0.18987200260162354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,12,1,128,1,float16,fp8,0,0.18925119638442994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,12,1,128,1,fp8,fp8,0,0.19041119813919066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,12,2,128,1,float16,float16,0,0.19160319566726686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,12,12,128,1,float16,fp8,0,0.41044001579284667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,12,2,128,1,fp8,fp8,0,0.19073760509490967
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,12,4,128,1,float16,float16,0,0.20315999984741212
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,12,4,128,1,float16,fp8,0,0.19064320325851442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,12,4,128,1,fp8,fp8,0,0.19104000329971313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,12,12,128,1,float16,float16,0,0.13483680486679078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,12,12,128,1,float16,fp8,0,0.12312959432601929
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,12,12,128,1,fp8,fp8,0,0.1231152057647705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,12,1,128,1,float16,float16,0,0.10686719417572021
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,12,1,128,1,float16,fp8,0,0.10663039684295654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,12,1,128,1,fp8,fp8,0,0.10700800418853759
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,12,2,128,1,float16,float16,0,0.1072335958480835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,12,2,128,1,float16,fp8,0,0.10688480138778686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,12,2,128,1,fp8,fp8,0,0.10705920457839965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,12,4,128,1,float16,float16,0,0.11317600011825561
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,12,4,128,1,fp8,fp8,0,0.1072111964225769
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,12,12,128,1,float16,float16,0,0.07825279831886292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,12,12,128,1,float16,fp8,0,0.07390879988670349
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,12,12,128,1,fp8,fp8,0,0.07389439940452576
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,12,1,128,1,float16,float16,0,0.06903679966926575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,12,1,128,1,float16,fp8,0,0.06714400053024291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,12,1,128,1,fp8,fp8,0,0.06733440160751343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,12,2,128,1,float16,float16,0,0.068476802110672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,12,2,128,1,float16,fp8,0,0.06762719750404358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,12,2,128,1,fp8,fp8,0,0.06683359742164612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,12,4,128,1,float16,float16,0,0.06999040246009827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,12,4,128,1,float16,fp8,0,0.06690719723701477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,12,4,128,1,fp8,fp8,0,0.06753919720649719
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,12,2,128,1,float16,fp8,0,0.19039839506149292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,12,12,128,1,float16,float16,0,0.04501599967479706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,12,12,128,1,float16,fp8,0,0.04326080083847046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,12,12,128,1,fp8,fp8,0,0.04429439902305603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,12,1,128,1,float16,float16,0,0.04273119866847992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,12,1,128,1,float16,fp8,0,0.041289600729942325
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,12,1,128,1,fp8,fp8,0,0.04121440052986145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,12,2,128,1,float16,float16,0,0.04256480038166046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,12,2,128,1,float16,fp8,0,0.04113599956035614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,12,2,128,1,fp8,fp8,0,0.04124319851398468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,12,4,128,1,float16,float16,0,0.043219199776649474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,12,4,128,1,float16,fp8,0,0.0412992000579834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,12,4,128,1,float16,fp8,0,0.10678880214691162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,12,1,128,1,float16,float16,0,0.6977071762084961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,12,1,128,1,float16,fp8,0,0.7259632110595703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,12,1,128,1,fp8,fp8,0,0.7278927803039551
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,12,2,128,1,float16,float16,0,0.6939343929290771
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,12,2,128,1,float16,fp8,0,0.7292384147644043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,12,2,128,1,fp8,fp8,0,0.7276527881622314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,12,4,128,1,float16,float16,0,0.7596608161926269
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,12,4,128,1,float16,fp8,0,0.7300191879272461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,12,4,128,1,fp8,fp8,0,0.7296527862548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,12,12,128,1,float16,float16,0,0.48053441047668455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,12,12,128,1,float16,fp8,0,0.44853601455688474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,12,1,128,1,float16,float16,0,0.36109280586242676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,12,1,128,1,float16,fp8,0,0.3733488082885742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,12,12,128,1,fp8,fp8,0,0.44863200187683105
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,12,1,128,1,fp8,fp8,0,0.37497758865356445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,12,2,128,1,float16,fp8,0,0.3747888088226318
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,12,2,128,1,fp8,fp8,0,0.37625119686126707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,12,4,128,1,float16,float16,0,0.38663039207458494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,12,4,128,1,float16,fp8,0,0.3776144027709961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,12,4,128,1,fp8,fp8,0,0.37591679096221925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,12,12,128,1,float16,float16,0,0.2525520086288452
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,12,12,128,1,float16,fp8,0,0.2358288049697876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,12,12,128,1,fp8,fp8,0,0.23693599700927734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,12,1,128,1,float16,float16,0,0.18939039707183838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,12,1,128,1,float16,fp8,0,0.19836800098419188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,12,1,128,1,fp8,fp8,0,0.1971295952796936
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,12,4,128,1,fp8,fp8,0,0.041126400232315063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,12,2,128,1,float16,float16,0,0.19383039474487304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,12,2,128,1,float16,fp8,0,0.1974063992500305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,12,2,128,1,fp8,fp8,0,0.19762879610061646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,12,4,128,1,float16,float16,0,0.2091775894165039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,12,4,128,1,float16,fp8,0,0.19890559911727906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,12,4,128,1,fp8,fp8,0,0.19871840476989747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,12,12,128,1,float16,float16,0,0.1380576014518738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,12,12,128,1,float16,fp8,0,0.1295088052749634
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,12,1,128,1,float16,float16,0,0.10648959875106812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,12,1,128,1,float16,fp8,0,0.10800960063934326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,12,1,128,1,fp8,fp8,0,0.10898879766464234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,12,2,128,1,float16,float16,0,0.10713759660720826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,12,2,128,1,float16,fp8,0,0.10846400260925293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,12,2,128,1,fp8,fp8,0,0.1084015965461731
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,12,4,128,1,float16,float16,0,0.11512160301208496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,12,4,128,1,fp8,fp8,0,0.10916320085525513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,12,12,128,1,float16,float16,0,0.08005279898643494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,12,12,128,1,float16,fp8,0,0.07304800152778626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,12,12,128,1,fp8,fp8,0,0.07411040067672729
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,12,1,128,1,float16,float16,0,0.06424639821052551
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,12,1,128,1,float16,fp8,0,0.06454240083694458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,12,1,128,1,fp8,fp8,0,0.06458560228347779
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,12,2,128,1,float16,float16,0,0.06424160003662109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,12,2,128,1,float16,fp8,0,0.06505759954452514
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,12,2,128,1,fp8,fp8,0,0.06446560025215149
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,12,4,128,1,float16,float16,0,0.06625599861145019
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,12,4,128,1,float16,fp8,0,0.06479679942131042
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,12,4,128,1,fp8,fp8,0,0.06498240232467652
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,12,12,128,1,fp8,fp8,0,0.12887359857559205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,12,12,128,1,float16,float16,0,0.047040000557899475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,12,12,128,1,float16,fp8,0,0.04724319875240326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,12,12,128,1,fp8,fp8,0,0.04732959866523743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,12,1,128,1,float16,float16,0,0.043263998627662656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,12,1,128,1,float16,fp8,0,0.0433023989200592
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,12,1,128,1,fp8,fp8,0,0.043171200156211856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,12,2,128,1,float16,fp8,0,0.04318560063838959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,12,2,128,1,float16,float16,0,0.35857439041137695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,12,2,128,1,fp8,fp8,0,0.043219199776649474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,12,4,128,1,float16,float16,0,0.044782400131225586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,12,4,128,1,float16,fp8,0,0.10895839929580689
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,12,4,128,1,float16,fp8,0,0.04318720102310181
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,12,4,128,1,fp8,fp8,0,0.043156799674034116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,12,12,128,1,float16,float16,0,0.03304480016231537
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,12,12,128,1,float16,fp8,0,0.032953599095344545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,12,12,128,1,fp8,fp8,0,0.032950401306152344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,12,1,128,1,float16,float16,0,0.03102880120277405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,12,1,128,1,float16,fp8,0,0.030895999073982237
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,12,1,128,1,fp8,fp8,0,0.03094240128993988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,12,2,128,1,float16,float16,0,0.030995199084281923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,12,2,128,1,float16,fp8,0,0.030884799361228944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,12,2,128,1,fp8,fp8,0,0.030990400910377504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,12,4,128,1,float16,float16,0,0.03295679986476898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,12,4,128,1,float16,fp8,0,0.030870398879051207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,12,4,128,1,fp8,fp8,0,0.03126400113105774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,12,1,128,1,float16,float16,0,0.44374399185180663
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,12,1,128,1,float16,fp8,0,0.4845088005065918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,12,1,128,1,fp8,fp8,0,0.48471522331237793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,12,2,128,1,float16,float16,0,0.45200481414794924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,12,2,128,1,float16,fp8,0,0.48575677871704104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,12,2,128,1,fp8,fp8,0,0.4854144096374512
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,12,4,128,1,float16,float16,0,0.487713623046875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,12,4,128,1,float16,fp8,0,0.48809118270874025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,12,2,128,1,float16,float16,0,0.044014400243759154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,12,12,128,1,float16,float16,0,0.31950399875640867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,12,4,128,1,fp8,fp8,0,0.4870783805847168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,12,12,128,1,float16,fp8,0,0.3079648017883301
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,12,12,128,1,fp8,fp8,0,0.3075376033782959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,12,1,128,1,float16,float16,0,0.23186080455780028
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,12,1,128,1,float16,fp8,0,0.25266399383544924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,12,1,128,1,fp8,fp8,0,0.2520992040634155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,12,2,128,1,float16,float16,0,0.23577439785003662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,12,2,128,1,float16,fp8,0,0.2525583982467651
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,12,2,128,1,fp8,fp8,0,0.2519792079925537
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,12,4,128,1,float16,float16,0,0.2546688079833984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,12,4,128,1,float16,fp8,0,0.25385921001434325
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,12,4,128,1,fp8,fp8,0,0.2536623954772949
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,12,12,128,1,float16,fp8,0,0.16395039558410646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,12,12,128,1,fp8,fp8,0,0.16403839588165284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,12,1,128,1,float16,float16,0,0.12761119604110718
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,12,1,128,1,float16,fp8,0,0.13513120412826538
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,12,2,128,1,float16,float16,0,0.1272447943687439
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,12,2,128,1,float16,fp8,0,0.1350767970085144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,12,2,128,1,fp8,fp8,0,0.13525279760360717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,12,4,128,1,float16,float16,0,0.13788000345230103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,12,4,128,1,float16,fp8,0,0.13546400070190429
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,12,4,128,1,fp8,fp8,0,0.13564480543136598
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,12,12,128,1,float16,float16,0,0.09463359713554383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,12,12,128,1,float16,fp8,0,0.09095519781112671
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,12,12,128,1,fp8,fp8,0,0.09042559862136841
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,12,1,128,1,float16,float16,0,0.07194079756736756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,12,1,128,1,float16,fp8,0,0.07575039863586426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,12,1,128,1,fp8,fp8,0,0.07601760029792785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,12,2,128,1,float16,fp8,0,0.07602880001068116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,12,2,128,1,fp8,fp8,0,0.07604320049285888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,12,4,128,1,float16,float16,0,0.07805920243263245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,12,4,128,1,float16,fp8,0,0.07606239914894104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,12,4,128,1,fp8,fp8,0,0.07618719935417176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,12,12,128,1,float16,float16,0,0.05430240035057068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,12,12,128,1,float16,float16,0,0.16997120380401612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,12,12,128,1,float16,fp8,0,0.053504002094268796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,12,12,128,1,fp8,fp8,0,0.053192001581192014
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,12,1,128,1,float16,float16,0,0.04549280107021332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,12,1,128,1,float16,fp8,0,0.046833598613739015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,12,1,128,1,fp8,fp8,0,0.046935999393463136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,12,2,128,1,float16,float16,0,0.04530400037765503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,12,1,128,1,fp8,fp8,0,0.1348207950592041
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,12,2,128,1,float16,fp8,0,0.04681920111179352
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,12,2,128,1,fp8,fp8,0,0.04669919908046723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,12,4,128,1,float16,float16,0,0.04751839935779571
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,12,4,128,1,fp8,fp8,0,0.04731360077857971
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,12,12,128,1,float16,float16,0,0.03102880120277405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,12,12,128,1,float16,fp8,0,0.03293919861316681
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,12,12,128,1,fp8,fp8,0,0.0329039990901947
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,12,1,128,1,float16,float16,0,0.02889760136604309
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,12,1,128,1,float16,fp8,0,0.028915199637413024
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,12,1,128,1,fp8,fp8,0,0.02876160144805908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,12,2,128,1,float16,float16,0,0.02921440005302429
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,12,2,128,1,float16,fp8,0,0.02886880040168762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,12,2,128,1,fp8,fp8,0,0.028916800022125246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,12,4,128,1,float16,float16,0,0.028916800022125246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,12,4,128,1,float16,fp8,0,0.028947201371192933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,12,4,128,1,fp8,fp8,0,0.028860801458358766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,12,12,128,1,float16,float16,0,0.028737598657608034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,12,12,128,1,float16,fp8,0,0.026953598856925963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,12,1,128,1,float16,float16,0,0.02680320143699646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,12,12,128,1,fp8,fp8,0,0.02886880040168762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,12,1,128,1,float16,fp8,0,0.026736000180244447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,12,1,128,1,fp8,fp8,0,0.026743999123573302
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,12,2,128,1,float16,float16,0,0.07221119999885559
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,12,2,128,1,float16,float16,0,0.02678399980068207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,12,2,128,1,float16,fp8,0,0.026804798841476442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,12,2,128,1,fp8,fp8,0,0.02683840095996857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,12,4,128,1,float16,fp8,0,0.026811200380325317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,12,4,128,1,fp8,fp8,0,0.026840001344680786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,12,1,128,1,float16,float16,0,0.48806238174438477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,12,1,128,1,float16,fp8,0,0.5533487796783447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,12,1,128,1,fp8,fp8,0,0.5541744232177734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,12,2,128,1,float16,float16,0,0.4951903820037842
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,12,4,128,1,float16,fp8,0,0.047310400009155276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,12,2,128,1,float16,fp8,0,0.5552720069885254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,12,2,128,1,fp8,fp8,0,0.5546495914459229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,12,4,128,1,float16,float16,0,0.5386544227600097
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,12,4,128,1,float16,fp8,0,0.5569295883178711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,12,4,128,1,fp8,fp8,0,0.5565360069274903
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,12,12,128,1,float16,float16,0,0.36101279258728025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,12,12,128,1,float16,fp8,0,0.3579296112060547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,12,12,128,1,fp8,fp8,0,0.3569247961044312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,12,1,128,1,float16,float16,0,0.2526655912399292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,12,4,128,1,float16,float16,0,0.02683840095996857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,12,1,128,1,float16,fp8,0,0.2852015972137451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,12,1,128,1,fp8,fp8,0,0.2849008083343506
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,12,2,128,1,float16,float16,0,0.25630080699920654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,12,2,128,1,float16,fp8,0,0.28622400760650635
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,12,2,128,1,fp8,fp8,0,0.28505759239196776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,12,4,128,1,float16,float16,0,0.2781872034072876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,12,4,128,1,float16,fp8,0,0.28693599700927735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,12,4,128,1,fp8,fp8,0,0.286080002784729
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,12,12,128,1,float16,float16,0,0.18988319635391235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,12,12,128,1,float16,fp8,0,0.1879647970199585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,12,1,128,1,float16,float16,0,0.13551199436187744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,12,1,128,1,float16,fp8,0,0.15098240375518798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,12,1,128,1,fp8,fp8,0,0.1503551959991455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,12,2,128,1,float16,float16,0,0.13879679441452025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,12,2,128,1,float16,fp8,0,0.150382399559021
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,12,2,128,1,fp8,fp8,0,0.15086400508880615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,12,4,128,1,float16,float16,0,0.1479472041130066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,12,4,128,1,float16,fp8,0,0.15172640085220337
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,12,4,128,1,fp8,fp8,0,0.15156480073928832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,12,12,128,1,float16,float16,0,0.10440000295639038
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,12,12,128,1,float16,fp8,0,0.10206880569458007
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,12,12,128,1,fp8,fp8,0,0.10255680084228516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,12,1,128,1,float16,float16,0,0.07634720206260681
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,12,1,128,1,float16,fp8,0,0.08230080008506775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,12,1,128,1,fp8,fp8,0,0.0822048008441925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,12,2,128,1,float16,float16,0,0.07810879945755005
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,12,2,128,1,float16,fp8,0,0.08223199844360352
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,12,2,128,1,fp8,fp8,0,0.0826799988746643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,12,4,128,1,float16,float16,0,0.0831712007522583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,12,4,128,1,float16,fp8,0,0.08327999711036682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,12,4,128,1,fp8,fp8,0,0.08280799984931946
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,12,12,128,1,float16,float16,0,0.06118879914283752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,12,12,128,1,float16,fp8,0,0.05779359936714172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,12,12,128,1,fp8,fp8,0,0.0591759979724884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,12,1,128,1,float16,float16,0,0.0467631995677948
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,12,1,128,1,float16,fp8,0,0.04939680099487305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,12,1,128,1,fp8,fp8,0,0.04925119876861572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,12,2,128,1,float16,float16,0,0.04771519899368286
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,12,2,128,1,float16,fp8,0,0.049214398860931395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,12,2,128,1,fp8,fp8,0,0.04943360090255737
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,12,4,128,1,float16,float16,0,0.04824320077896118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,12,4,128,1,float16,fp8,0,0.049377599358558656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,12,12,128,1,fp8,fp8,0,0.18700319528579712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,12,4,128,1,fp8,fp8,0,0.04919840097427368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,12,12,128,1,float16,float16,0,0.0350735992193222
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,12,12,128,1,float16,fp8,0,0.03704319894313812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,12,12,128,1,fp8,fp8,0,0.036985599994659425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,12,1,128,1,float16,float16,0,0.03279680013656616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,12,1,128,1,float16,fp8,0,0.03282400071620941
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,12,1,128,1,fp8,fp8,0,0.03293280005455017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,12,2,128,1,float16,float16,0,0.032950401306152344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,12,2,128,1,float16,fp8,0,0.03293280005455017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,12,2,128,1,fp8,fp8,0,0.032872000336647035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,12,4,128,1,float16,float16,0,0.032971200346946714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,12,4,128,1,fp8,fp8,0,0.03302879929542542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,12,12,128,1,float16,float16,0,0.024748800694942473
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,12,12,128,1,fp8,fp8,0,0.024881599843502043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,12,1,128,1,float16,float16,0,0.022809599339962006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,12,1,128,1,float16,fp8,0,0.022832000255584718
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,12,1,128,1,fp8,fp8,0,0.022819200158119203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,12,2,128,1,float16,float16,0,0.022833600640296936
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,12,2,128,1,float16,fp8,0,0.02276960015296936
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,12,4,128,1,float16,float16,0,0.024158400297164918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,12,4,128,1,float16,fp8,0,0.02279839962720871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,12,4,128,1,fp8,fp8,0,0.022814400494098663
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,12,12,128,1,float16,float16,0,0.02276639938354492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,12,12,128,1,float16,fp8,0,0.02274080067873001
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,12,12,128,1,fp8,fp8,0,0.022793599963188173
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,12,1,128,1,float16,float16,0,0.0226160004734993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,12,1,128,1,float16,fp8,0,0.022551999986171724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,12,1,128,1,fp8,fp8,0,0.022673599421977997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,12,2,128,1,float16,float16,0,0.022679999470710754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,12,2,128,1,float16,fp8,0,0.022579200565814972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,12,2,128,1,fp8,fp8,0,0.021648000180721282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,12,4,128,1,float16,float16,0,0.022635200619697572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,12,4,128,1,float16,fp8,0,0.02221119999885559
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,12,4,128,1,fp8,fp8,0,0.021513600647449494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,12,4,128,1,float16,fp8,0,0.032927998900413515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,12,1,128,1,float16,float16,0,0.3887295961380005
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,12,12,128,1,float16,fp8,0,0.024959999322891235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,12,1,128,1,float16,fp8,0,0.4643519878387451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,12,2,128,1,float16,float16,0,0.39289920330047606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,12,2,128,1,fp8,fp8,0,0.023180800676345825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,12,2,128,1,fp8,fp8,0,0.4645792007446289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,12,4,128,1,float16,float16,0,0.4327807903289795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,12,4,128,1,float16,fp8,0,0.4654560089111328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,12,4,128,1,fp8,fp8,0,0.4665359973907471
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,12,12,128,1,float16,float16,0,0.3044080018997192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,12,12,128,1,float16,fp8,0,0.3088047981262207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,12,12,128,1,fp8,fp8,0,0.3087440013885498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,12,1,128,1,float16,float16,0,0.20227839946746826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,12,1,128,1,float16,fp8,0,0.23957760334014894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,12,1,128,1,fp8,fp8,0,0.23996639251708984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,12,2,128,1,float16,float16,0,0.20369439125061034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,12,2,128,1,float16,fp8,0,0.23952159881591797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,12,2,128,1,fp8,fp8,0,0.23972320556640625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,12,4,128,1,float16,float16,0,0.223852801322937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,12,4,128,1,float16,fp8,0,0.2401808023452759
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,12,1,128,1,fp8,fp8,0,0.4651679992675781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,12,4,128,1,fp8,fp8,0,0.2398655891418457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,12,12,128,1,float16,float16,0,0.15996639728546141
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,12,12,128,1,float16,fp8,0,0.16251840591430664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,12,2,128,1,float16,fp8,0,0.4639887809753418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,12,1,128,1,float16,float16,0,0.1092960000038147
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,12,12,128,1,fp8,fp8,0,0.1620352029800415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,12,1,128,1,float16,fp8,0,0.12604000568389892
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,12,1,128,1,fp8,fp8,0,0.1272447943687439
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,12,2,128,1,float16,fp8,0,0.1269711971282959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,12,2,128,1,fp8,fp8,0,0.12727359533309937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,12,4,128,1,float16,float16,0,0.11940799951553345
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,12,4,128,1,float16,fp8,0,0.12744799852371216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,12,4,128,1,fp8,fp8,0,0.12721760272979737
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,12,12,128,1,float16,float16,0,0.08820160031318665
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,12,12,128,1,float16,fp8,0,0.08791999816894532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,12,12,128,1,fp8,fp8,0,0.08868640065193176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,12,1,128,1,float16,float16,0,0.06144160032272339
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,12,1,128,1,fp8,fp8,0,0.06856160163879395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,12,1,128,1,float16,fp8,0,0.06962400078773498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,12,2,128,1,float16,float16,0,0.06257439851760864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,12,2,128,1,float16,fp8,0,0.06969919800758362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,12,2,128,1,fp8,fp8,0,0.06974080204963684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,12,4,128,1,float16,float16,0,0.06790080070495605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,12,4,128,1,float16,fp8,0,0.06962239742279053
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,12,4,128,1,fp8,fp8,0,0.0699072003364563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,12,12,128,1,float16,float16,0,0.05057600140571594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,12,12,128,1,float16,fp8,0,0.05034400224685669
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,12,12,128,1,fp8,fp8,0,0.049472001194953916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,12,1,128,1,float16,float16,0,0.03751679956912994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,12,1,128,1,float16,fp8,0,0.04113599956035614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,12,1,128,1,fp8,fp8,0,0.04121119976043701
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,12,2,128,1,float16,float16,0,0.03716639876365661
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,12,2,128,1,float16,fp8,0,0.041223999857902524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,12,2,128,1,fp8,fp8,0,0.041116800904273984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,12,4,128,1,float16,float16,0,0.040092799067497256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,12,4,128,1,float16,fp8,0,0.04114879965782166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,12,4,128,1,fp8,fp8,0,0.04119200110435486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,12,12,128,1,float16,float16,0,0.02922079861164093
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,12,12,128,1,float16,fp8,0,0.03299039900302887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,12,12,128,1,fp8,fp8,0,0.03287360072135925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,12,1,128,1,float16,float16,0,0.026947200298309326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,12,1,128,1,float16,fp8,0,0.02873600125312805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,12,1,128,1,fp8,fp8,0,0.028863999247550964
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,12,2,128,1,float16,float16,0,0.02682879865169525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,12,2,128,1,float16,fp8,0,0.02884320020675659
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,12,2,128,1,float16,float16,0,0.10969439744949341
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,12,4,128,1,float16,float16,0,0.028436800837516783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,12,4,128,1,float16,fp8,0,0.028854399919509888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,12,4,128,1,fp8,fp8,0,0.02866879999637604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,12,12,128,1,float16,float16,0,0.020528000593185425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,12,12,128,1,float16,fp8,0,0.02084160000085831
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,12,12,128,1,fp8,fp8,0,0.020875200629234314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,12,1,128,1,float16,float16,0,0.018587200343608855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,12,1,128,1,float16,fp8,0,0.018753600120544434
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,12,1,128,1,fp8,fp8,0,0.01870719939470291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,12,2,128,1,float16,float16,0,0.018617600202560425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,12,2,128,1,float16,fp8,0,0.018755200505256652
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,12,2,128,1,fp8,fp8,0,0.018780800700187682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,12,4,128,1,float16,float16,0,0.01876160055398941
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,12,4,128,1,float16,fp8,0,0.018787199258804323
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,12,4,128,1,fp8,fp8,0,0.018745599687099455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,12,12,128,1,float16,float16,0,0.018675200641155243
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,12,12,128,1,float16,fp8,0,0.0187376007437706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,12,12,128,1,fp8,fp8,0,0.01858399957418442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,12,1,128,1,float16,float16,0,0.018433600664138794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,12,1,128,1,float16,fp8,0,0.01685120016336441
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,12,1,128,1,fp8,fp8,0,0.018593600392341612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,12,2,128,1,float16,fp8,0,0.016697600483894348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,12,2,128,1,fp8,fp8,0,0.01849759966135025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,12,4,128,1,float16,float16,0,0.018025599420070648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,12,4,128,1,float16,fp8,0,0.018534399569034576
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,12,4,128,1,fp8,fp8,0,0.01857919991016388
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,12,12,128,1,float16,float16,0,0.018031999468803406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,12,12,128,1,float16,fp8,0,0.01664479970932007
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,12,12,128,1,fp8,fp8,0,0.016663999855518342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,12,1,128,1,float16,float16,0,0.01680160015821457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,12,1,128,1,float16,fp8,0,0.016531200706958772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,12,1,128,1,fp8,fp8,0,0.0166703999042511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,12,2,128,1,float16,float16,0,0.016705599427223206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,12,2,128,1,float16,fp8,0,0.016708800196647645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,12,2,128,1,fp8,fp8,0,0.016689600050449373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,12,4,128,1,float16,float16,0,0.016683200001716615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,12,4,128,1,fp8,fp8,0,0.01658879965543747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,12,4,128,1,float16,fp8,0,0.016705599427223206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,12,1,128,1,float16,float16,0,0.17411999702453612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,12,1,128,1,float16,fp8,0,0.21370561122894288
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,12,2,128,1,fp8,fp8,0,0.028814399242401124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,12,2,128,1,float16,float16,0,0.1759727954864502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,12,2,128,1,float16,fp8,0,0.2132767915725708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,12,2,128,1,fp8,fp8,0,0.21401600837707518
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,12,4,128,1,float16,float16,0,0.19431040287017823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,12,4,128,1,float16,fp8,0,0.21483199596405028
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,12,4,128,1,fp8,fp8,0,0.21351680755615235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,12,12,128,1,float16,fp8,0,0.14765599966049195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,12,12,128,1,fp8,fp8,0,0.14761919975280763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,12,2,128,1,float16,float16,0,0.016990399360656737
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,12,1,128,1,float16,fp8,0,0.11356960535049439
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,12,1,128,1,fp8,fp8,0,0.11430399417877198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,12,2,128,1,float16,float16,0,0.09551519751548768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,12,2,128,1,float16,fp8,0,0.11399359703063965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,12,2,128,1,fp8,fp8,0,0.11419839859008789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,12,1,128,1,fp8,fp8,0,0.21356160640716554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,12,4,128,1,float16,float16,0,0.10499839782714844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,12,4,128,1,float16,fp8,0,0.1147312045097351
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,12,4,128,1,fp8,fp8,0,0.11480799913406373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,12,12,128,1,float16,float16,0,0.14444479942321778
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,12,12,128,1,float16,fp8,0,0.08208000063896179
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,12,12,128,1,fp8,fp8,0,0.08215839862823486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,12,1,128,1,float16,float16,0,0.055587202310562134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,12,1,128,1,float16,float16,0,0.09466559886932373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,12,1,128,1,float16,fp8,0,0.06336479783058166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,12,1,128,1,fp8,fp8,0,0.06262720227241517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,12,2,128,1,float16,float16,0,0.05684319734573364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,12,2,128,1,float16,fp8,0,0.06367040276527405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,12,2,128,1,fp8,fp8,0,0.0634992003440857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,12,4,128,1,float16,float16,0,0.0605135977268219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,12,4,128,1,float16,fp8,0,0.06385759711265564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,12,4,128,1,fp8,fp8,0,0.06369919776916504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,12,12,128,1,float16,float16,0,0.04524160027503967
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,12,12,128,1,fp8,fp8,0,0.04533439874649048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,12,1,128,1,float16,float16,0,0.03295519948005676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,12,1,128,1,float16,fp8,0,0.03697119951248169
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,12,1,128,1,fp8,fp8,0,0.03698239922523498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,12,2,128,1,float16,float16,0,0.03293440043926239
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,12,2,128,1,float16,fp8,0,0.03698399960994721
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,12,2,128,1,fp8,fp8,0,0.036980798840522765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,12,4,128,1,float16,float16,0,0.03506560027599335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,12,4,128,1,float16,fp8,0,0.036985599994659425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,12,4,128,1,fp8,fp8,0,0.03697440028190613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,12,12,128,1,float16,float16,0,0.026924800872802735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,12,12,128,1,float16,float16,0,0.08073920011520386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,12,12,128,1,float16,fp8,0,0.030435198545455934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,12,12,128,1,fp8,fp8,0,0.030814400315284728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,12,1,128,1,float16,float16,0,0.024534399807453155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,12,1,128,1,float16,fp8,0,0.025927999615669252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,12,1,128,1,fp8,fp8,0,0.026163199543952943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,12,2,128,1,float16,fp8,0,0.026225599646568298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,12,2,128,1,fp8,fp8,0,0.026526400446891786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,12,4,128,1,float16,float16,0,0.02484800070524216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,12,4,128,1,float16,fp8,0,0.02669279873371124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,12,4,128,1,fp8,fp8,0,0.02570880055427551
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,12,12,128,1,float16,float16,0,0.01849759966135025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,12,12,128,1,float16,fp8,0,0.018739199638366698
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,12,12,128,1,fp8,fp8,0,0.01866399943828583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,12,1,128,1,float16,fp8,0,0.016752000153064727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,12,1,128,1,fp8,fp8,0,0.016676799952983858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,12,2,128,1,float16,float16,0,0.01653759926557541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,12,2,128,1,float16,fp8,0,0.016681599617004394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,12,12,128,1,float16,fp8,0,0.04539999961853027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,12,2,128,1,fp8,fp8,0,0.01674560010433197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,12,4,128,1,float16,float16,0,0.016545599699020384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,12,4,128,1,float16,fp8,0,0.016651199758052827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,12,4,128,1,fp8,fp8,0,0.01658719927072525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,12,12,128,1,float16,float16,0,0.01655679941177368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,12,12,128,1,float16,fp8,0,0.016521599888801575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,12,12,128,1,fp8,fp8,0,0.016732800006866454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,12,1,128,1,float16,float16,0,0.014584000408649444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,12,1,128,1,float16,fp8,0,0.016527999937534333
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,12,2,128,1,float16,float16,0,0.015315200388431548
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,12,2,128,1,float16,fp8,0,0.016446399688720702
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,12,2,128,1,fp8,fp8,0,0.01555359959602356
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,12,4,128,1,float16,float16,0,0.015030400454998016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,12,4,128,1,float16,fp8,0,0.015345600247383118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,12,4,128,1,fp8,fp8,0,0.014897599816322327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,12,12,128,1,float16,float16,0,0.01650719940662384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,12,12,128,1,float16,fp8,0,0.016468800604343414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,12,12,128,1,fp8,fp8,0,0.016475200653076172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,12,1,128,1,float16,float16,0,0.014668799936771393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,12,1,128,1,float16,fp8,0,0.014678399264812469
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,12,2,128,1,float16,float16,0,0.023089599609375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,12,1,128,1,fp8,fp8,0,0.014560000598430633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,12,2,128,1,float16,float16,0,0.014519999921321868
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,12,2,128,1,float16,fp8,0,0.014575999975204468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,12,2,128,1,fp8,fp8,0,0.014502400159835815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,12,4,128,1,float16,float16,0,0.014660799503326416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,12,4,128,1,float16,fp8,0,0.014595200121402741
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,12,4,128,1,fp8,fp8,0,0.014601600170135499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,12,1,128,1,float16,float16,0,0.01656319946050644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,12,12,128,1,float16,float16,0,0.01600320041179657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,12,12,128,1,float16,fp8,0,0.014545600116252898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,12,12,128,1,fp8,fp8,0,0.014655999839305878
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,12,1,128,1,float16,float16,0,0.01464959979057312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,12,1,128,1,float16,fp8,0,0.014507199823856353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,12,1,128,1,fp8,fp8,0,0.014604799449443817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,12,2,128,1,float16,fp8,0,0.014575999975204468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,12,2,128,1,fp8,fp8,0,0.014595200121402741
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,12,4,128,1,float16,float16,0,0.014692799746990204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,12,4,128,1,float16,fp8,0,0.014668799936771393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,12,4,128,1,fp8,fp8,0,0.014593599736690522
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,12,1,128,1,float16,float16,0,0.10579839944839478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,12,1,128,1,float16,fp8,0,0.12595200538635254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,12,1,128,1,fp8,fp8,0,0.12547839879989625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,12,2,128,1,float16,float16,0,0.10677119493484497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,12,2,128,1,float16,fp8,0,0.12523679733276366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,12,2,128,1,fp8,fp8,0,0.12615679502487182
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,12,4,128,1,float16,float16,0,0.11494560241699218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,12,4,128,1,float16,fp8,0,0.12623039484024048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,12,4,128,1,fp8,fp8,0,0.12527040243148804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,12,12,128,1,float16,float16,0,0.08114879727363586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,12,12,128,1,float16,fp8,0,0.08611360192298889
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,12,12,128,1,fp8,fp8,0,0.08627520203590393
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,12,1,128,1,float16,float16,0,0.05960639715194702
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,12,1,128,1,float16,fp8,0,0.06783360242843628
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,12,1,128,1,fp8,fp8,0,0.06784960031509399
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,12,2,128,1,float16,float16,0,0.05973119735717773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,12,2,128,1,float16,fp8,0,0.06779839992523193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,12,2,128,1,fp8,fp8,0,0.06780959963798523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,12,4,128,1,float16,float16,0,0.0642848014831543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,12,4,128,1,float16,fp8,0,0.06784800291061402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,12,4,128,1,fp8,fp8,0,0.06791039705276489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,12,12,128,1,float16,float16,0,0.04624319970607758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,12,12,128,1,float16,fp8,0,0.04734080135822296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,12,12,128,1,fp8,fp8,0,0.04750080108642578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,12,1,128,1,float16,float16,0,0.034985598921775815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,12,1,128,1,float16,fp8,0,0.03906719982624054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,12,1,128,1,fp8,fp8,0,0.03923999965190887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,12,1,128,1,fp8,fp8,0,0.014534400403499603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,12,2,128,1,float16,float16,0,0.03489440083503723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,12,2,128,1,float16,fp8,0,0.039108800888061526
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,12,2,128,1,fp8,fp8,0,0.039175999164581296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,12,4,128,1,float16,float16,0,0.036513599753379825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,12,4,128,1,float16,fp8,0,0.03922240138053894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,12,4,128,1,fp8,fp8,0,0.03917439877986908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,12,12,128,1,float16,float16,0,0.024796800315380098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,12,12,128,1,float16,fp8,0,0.028835201263427736
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,12,12,128,1,fp8,fp8,0,0.028808000683784484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,12,1,128,1,float16,float16,0,0.02133920043706894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,12,1,128,1,float16,fp8,0,0.024672000110149382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,12,1,128,1,fp8,fp8,0,0.024566400051116943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,12,2,128,1,float16,float16,0,0.022753599286079406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,12,2,128,1,float16,fp8,0,0.024775999784469604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,12,2,128,1,fp8,fp8,0,0.02449920028448105
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,12,4,128,1,float16,float16,0,0.022683200240135194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,12,4,128,1,fp8,fp8,0,0.024758400022983552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,12,4,128,1,float16,fp8,0,0.024820800125598907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,12,12,128,1,float16,float16,0,0.01860000044107437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,12,12,128,1,float16,fp8,0,0.020584000647068022
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,12,1,128,1,float16,float16,0,0.016495999693870545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,12,12,128,1,fp8,fp8,0,0.01921440064907074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,12,1,128,1,float16,fp8,0,0.018510399758815764
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,12,1,128,1,fp8,fp8,0,0.016758400201797485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,12,2,128,1,float16,float16,0,0.01672479957342148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,12,2,128,1,float16,fp8,0,0.016729600727558136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,12,2,128,1,fp8,fp8,0,0.01849440038204193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,12,4,128,1,float16,float16,0,0.016572800278663636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,12,4,128,1,float16,fp8,0,0.018569600582122803
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,12,2,128,1,float16,float16,0,0.014483200013637542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,12,12,128,1,float16,float16,0,0.013240000605583191
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,12,12,128,1,float16,fp8,0,0.012606400251388549
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,12,1,128,1,float16,float16,0,0.012408000230789185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,12,1,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,12,1,128,1,fp8,fp8,0,0.012488000094890594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,12,2,128,1,float16,float16,0,0.012511999905109405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,12,2,128,1,float16,fp8,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,12,2,128,1,fp8,fp8,0,0.01247519999742508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,12,4,128,1,float16,float16,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,12,4,128,1,fp8,fp8,0,0.012415999919176102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,12,12,128,1,float16,float16,0,0.012415999919176102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,12,12,128,1,float16,fp8,0,0.012443199753761292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,12,12,128,1,fp8,fp8,0,0.012417600303888322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,12,1,128,1,float16,float16,0,0.010644800215959548
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,12,1,128,1,float16,fp8,0,0.011020799726247787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,12,1,128,1,fp8,fp8,0,0.01101439967751503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,12,2,128,1,float16,float16,0,0.010639999806880952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,12,2,128,1,float16,fp8,0,0.011324799805879592
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,12,2,128,1,fp8,fp8,0,0.011539199948310852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,12,4,128,1,float16,float16,0,0.012479999661445617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,12,4,128,1,float16,fp8,0,0.012409599870443344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,12,4,128,1,fp8,fp8,0,0.012411200255155564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,12,12,128,1,float16,float16,0,0.012532800436019897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,12,12,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,12,12,128,1,fp8,fp8,0,0.012457600235939026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,12,1,128,1,float16,float16,0,0.011561600118875503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,12,1,128,1,float16,fp8,0,0.011030399799346923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,12,1,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,12,2,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,12,2,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,12,2,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,12,4,128,1,float16,float16,0,0.012070400267839431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,12,4,128,1,float16,fp8,0,0.01133280023932457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,12,4,128,1,fp8,fp8,0,0.011561600118875503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,12,4,128,1,float16,fp8,0,0.012702399492263794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,12,12,128,1,float16,float16,0,0.012460800260305405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,12,12,128,1,float16,fp8,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,12,12,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,12,1,128,1,float16,float16,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,12,1,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,12,1,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,12,2,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,12,2,128,1,float16,fp8,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,12,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,12,4,128,1,float16,float16,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,12,4,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,12,4,128,1,fp8,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,12,1,128,1,float16,float16,0,0.08006719946861267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,12,1,128,1,float16,fp8,0,0.08833919763565064
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,12,1,128,1,fp8,fp8,0,0.08836320042610168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,12,2,128,1,float16,float16,0,0.08079680204391479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,12,2,128,1,float16,fp8,0,0.08822240233421326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,12,2,128,1,fp8,fp8,0,0.08827040195465088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,12,4,128,1,float16,float16,0,0.08441920280456543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,12,4,128,1,float16,fp8,0,0.08850079774856567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,12,4,128,1,fp8,fp8,0,0.088510400056839
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,12,4,128,1,fp8,fp8,0,0.01703519970178604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,12,12,128,1,float16,float16,0,0.0554639995098114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,12,12,128,1,float16,fp8,0,0.05759360194206238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,12,12,128,1,fp8,fp8,0,0.014521600306034088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,12,12,128,1,fp8,fp8,0,0.0576416015625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,12,1,128,1,float16,float16,0,0.04347360134124756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,12,1,128,1,float16,fp8,0,0.04928959906101227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,12,1,128,1,fp8,fp8,0,0.049307200312614444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,12,2,128,1,float16,float16,0,0.043881601095199584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,12,2,128,1,float16,fp8,0,0.049316799640655516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,12,4,128,1,float16,float16,0,0.04726400077342987
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,12,4,128,1,float16,fp8,0,0.04936639964580536
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,12,4,128,1,fp8,fp8,0,0.0493151992559433
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,12,12,128,1,float16,float16,0,0.03084160089492798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,12,12,128,1,float16,fp8,0,0.03298720121383667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,12,12,128,1,fp8,fp8,0,0.033083200454711914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,12,1,128,1,float16,fp8,0,0.028892800211906433
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,12,1,128,1,fp8,fp8,0,0.028939199447631837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,12,2,128,1,float16,float16,0,0.026969599723815917
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,12,2,128,1,float16,fp8,0,0.028988799452781676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,12,2,128,1,fp8,fp8,0,0.029068800806999206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,12,4,128,1,float16,float16,0,0.028832000494003297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,12,4,128,1,float16,fp8,0,0.028977599740028382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,12,4,128,1,fp8,fp8,0,0.028998398780822755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,12,12,128,1,float16,float16,0,0.02022880017757416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,12,12,128,1,float16,fp8,0,0.020803199708461763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,12,12,128,1,fp8,fp8,0,0.02088959962129593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,12,1,128,1,float16,float16,0,0.018540799617767334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,12,1,128,1,float16,fp8,0,0.01881919950246811
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,12,1,128,1,fp8,fp8,0,0.01870879977941513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,12,2,128,1,float16,float16,0,0.01860159933567047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,12,2,128,1,float16,fp8,0,0.018694399297237395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,12,2,128,1,fp8,fp8,0,0.018801599740982056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,12,4,128,1,float16,float16,0,0.018787199258804323
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,12,2,128,1,fp8,fp8,0,0.04926080107688904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,12,4,128,1,float16,fp8,0,0.018863999843597413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,12,4,128,1,fp8,fp8,0,0.018771199882030486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,12,12,128,1,float16,float16,0,0.014788800477981567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,12,12,128,1,float16,fp8,0,0.016510400176048278
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,12,12,128,1,fp8,fp8,0,0.016598400473594666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,12,1,128,1,float16,float16,0,0.014644800126552582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,12,1,128,1,float16,fp8,0,0.014766399562358857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,12,1,128,1,float16,float16,0,0.026833599805831908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,12,1,128,1,fp8,fp8,0,0.014577600359916686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,12,2,128,1,float16,float16,0,0.014606399834156037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,12,2,128,1,float16,fp8,0,0.014552000164985656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,12,2,128,1,fp8,fp8,0,0.014694400131702423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,12,4,128,1,float16,fp8,0,0.014535999298095703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,12,4,128,1,fp8,fp8,0,0.014580799639225006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,12,12,128,1,float16,float16,0,0.010655999928712846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,12,12,128,1,float16,fp8,0,0.012492799758911132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,12,1,128,1,float16,float16,0,0.011132799834012986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,12,1,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,12,1,128,1,fp8,fp8,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,12,2,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,12,2,128,1,fp8,fp8,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,12,2,128,1,float16,fp8,0,0.01069760024547577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,12,4,128,1,float16,float16,0,0.010622400045394897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,12,4,128,1,float16,fp8,0,0.010630399733781815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,12,4,128,1,fp8,fp8,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,12,12,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,12,12,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,12,1,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,12,1,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,12,1,128,1,fp8,fp8,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,12,2,128,1,float16,float16,0,0.01053759977221489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,12,2,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,12,2,128,1,fp8,fp8,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,12,4,128,1,float16,float16,0,0.010542400181293488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,12,4,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,12,4,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,12,12,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,12,12,128,1,float16,fp8,0,0.010278400033712387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,12,12,128,1,fp8,fp8,0,0.010315199941396713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,12,1,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,12,4,128,1,float16,float16,0,0.014659200608730317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,12,1,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,12,1,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,12,2,128,1,float16,float16,0,0.010569600015878677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,12,2,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,12,2,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,12,12,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,12,4,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,12,4,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,12,12,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,12,4,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,12,12,128,1,float16,fp8,0,0.01034879982471466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,12,12,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,12,1,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,12,1,128,1,float16,fp8,0,0.010331200063228607
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,12,1,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,12,2,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,12,2,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,12,4,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,12,4,128,1,float16,fp8,0,0.010335999727249145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,12,4,128,1,fp8,fp8,0,0.010353600233793258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,12,1,128,1,float16,float16,0,0.06775839924812317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,12,1,128,1,float16,fp8,0,0.06988000273704528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,12,12,128,1,float16,float16,0,0.010734400153160096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,12,1,128,1,fp8,fp8,0,0.06988000273704528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,12,2,128,1,float16,float16,0,0.06740639805793762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,12,2,128,1,float16,fp8,0,0.06999520063400269
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,12,2,128,1,fp8,fp8,0,0.0699567973613739
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,12,4,128,1,float16,fp8,0,0.0699072003364563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,12,4,128,1,fp8,fp8,0,0.06979039907455445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,12,12,128,1,float16,float16,0,0.04118399918079376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,12,12,128,1,float16,fp8,0,0.04317600131034851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,12,12,128,1,fp8,fp8,0,0.043171200156211856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,12,1,128,1,float16,float16,0,0.037283200025558474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,12,1,128,1,float16,fp8,0,0.03912000060081482
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,12,1,128,1,fp8,fp8,0,0.03912160098552704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,12,2,128,1,float16,float16,0,0.03757759928703308
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,12,2,128,1,float16,fp8,0,0.03913280069828033
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,12,2,128,1,fp8,fp8,0,0.039164799451828006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,12,4,128,1,float16,float16,0,0.039182400703430174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,12,2,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,12,4,128,1,fp8,fp8,0,0.03914560079574585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,12,12,128,1,float16,float16,0,0.02476799935102463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,12,12,128,1,float16,fp8,0,0.02678079903125763
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,12,12,128,1,fp8,fp8,0,0.026793599128723145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,12,1,128,1,float16,float16,0,0.024694399535655977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,12,1,128,1,float16,fp8,0,0.02476000040769577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,12,4,128,1,float16,float16,0,0.06988800168037415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,12,1,128,1,fp8,fp8,0,0.024639999866485594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,12,2,128,1,float16,float16,0,0.024697600305080412
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,12,2,128,1,fp8,fp8,0,0.024798400700092316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,12,4,128,1,float16,float16,0,0.024771200120449068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,12,4,128,1,float16,fp8,0,0.024809600412845613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,12,4,128,1,fp8,fp8,0,0.02481119930744171
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,12,12,128,1,float16,float16,0,0.016752000153064727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,12,12,128,1,float16,fp8,0,0.016678400337696075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,12,12,128,1,fp8,fp8,0,0.016740800440311433
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,12,1,128,1,float16,float16,0,0.016545599699020384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,12,1,128,1,float16,fp8,0,0.016638399660587312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,12,1,128,1,fp8,fp8,0,0.016676799952983858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,12,2,128,1,float16,float16,0,0.016657599806785585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,12,2,128,1,float16,fp8,0,0.01658399999141693
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,12,2,128,1,fp8,fp8,0,0.016531200706958772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,12,4,128,1,float16,float16,0,0.016916799545288085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,12,4,128,1,float16,fp8,0,0.039105600118637084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,12,4,128,1,fp8,fp8,0,0.016531200706958772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,12,12,128,1,float16,fp8,0,0.014483200013637542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,12,12,128,1,fp8,fp8,0,0.014388799667358398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,12,1,128,1,float16,float16,0,0.012532800436019897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,12,1,128,1,float16,fp8,0,0.012624000012874604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,12,1,128,1,fp8,fp8,0,0.012547199428081513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,12,2,128,1,float16,float16,0,0.013436800241470337
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,12,2,128,1,float16,fp8,0,0.012617599964141846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,12,2,128,1,float16,fp8,0,0.024823999404907225
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,12,4,128,1,float16,float16,0,0.012492799758911132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,12,4,128,1,float16,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,12,4,128,1,fp8,fp8,0,0.012481600046157837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,12,12,128,1,float16,float16,0,0.010366400331258773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,12,12,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,12,12,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,12,1,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,12,1,128,1,float16,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,12,1,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,12,2,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,12,2,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,12,2,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,12,4,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,12,4,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,12,4,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,12,12,128,1,float16,float16,0,0.010364799946546554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,12,12,128,1,float16,fp8,0,0.01003199964761734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,12,12,128,1,float16,float16,0,0.014451199769973755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,12,12,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,12,1,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,12,1,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,12,1,128,1,fp8,fp8,0,0.008726400136947633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,12,2,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,12,2,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,12,4,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,12,4,128,1,float16,fp8,0,0.008937600255012512
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,12,4,128,1,fp8,fp8,0,0.009160000085830688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,12,12,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,12,2,128,1,fp8,fp8,0,0.012625600397586822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,12,12,128,1,float16,fp8,0,0.00843520015478134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,12,12,128,1,fp8,fp8,0,0.010326399654150008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,12,1,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,12,1,128,1,fp8,fp8,0,0.008463999629020691
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,12,1,128,1,float16,fp8,0,0.010248000174760819
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,12,2,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,12,2,128,1,float16,fp8,0,0.01027040034532547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,12,2,128,1,fp8,fp8,0,0.010332799702882766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,12,4,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,12,4,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,12,4,128,1,fp8,fp8,0,0.010292799770832061
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,12,12,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,12,12,128,1,float16,fp8,0,0.010273600369691849
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,12,12,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,12,1,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,12,1,128,1,float16,fp8,0,0.00950080007314682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,12,1,128,1,fp8,fp8,0,0.010300800204277039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,12,2,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,12,2,128,1,float16,fp8,0,0.01032319962978363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,12,2,128,1,fp8,fp8,0,0.00957920029759407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,12,4,128,1,float16,float16,0,0.010367999970912933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,12,4,128,1,float16,fp8,0,0.010684800148010255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,12,4,128,1,fp8,fp8,0,0.010310400277376175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,12,1,128,1,float16,float16,0,0.06385759711265564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,12,1,128,1,float16,fp8,0,0.06174719929695129
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,12,4,128,1,float16,fp8,0,0.01658080071210861
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,12,1,128,1,fp8,fp8,0,0.061692798137664796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,12,2,128,1,float16,float16,0,0.06371999979019165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,12,2,128,1,float16,fp8,0,0.06175680160522461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,12,2,128,1,fp8,fp8,0,0.061742401123046874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,12,4,128,1,float16,float16,0,0.06549280285835266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,12,4,128,1,float16,fp8,0,0.061768001317977904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,12,2,128,1,float16,fp8,0,0.009086400270462036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,12,4,128,1,fp8,fp8,0,0.06178240180015564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,12,12,128,1,float16,float16,0,0.03714239895343781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,12,12,128,1,float16,fp8,0,0.037067198753356935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,12,12,128,1,fp8,fp8,0,0.038812801241874695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,12,1,128,1,float16,float16,0,0.03705599904060364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,12,1,128,1,float16,fp8,0,0.03510879874229431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,12,1,128,1,fp8,fp8,0,0.03708159923553467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,12,2,128,1,float16,float16,0,0.037006399035453795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,12,2,128,1,float16,fp8,0,0.035041600465774536
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,12,2,128,1,fp8,fp8,0,0.03699040114879608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,12,4,128,1,float16,float16,0,0.03707680106163025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,12,4,128,1,float16,fp8,0,0.03515999913215637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,12,4,128,1,fp8,fp8,0,0.03702560067176819
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,12,12,128,1,float16,fp8,0,0.023112000524997713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,12,12,128,1,fp8,fp8,0,0.02465600073337555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,12,1,128,1,float16,float16,0,0.022742399573326112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,12,1,128,1,float16,fp8,0,0.022732800245285033
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,12,1,128,1,fp8,fp8,0,0.02271360009908676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,12,2,128,1,float16,float16,0,0.022843199968338012
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,12,2,128,1,float16,fp8,0,0.022742399573326112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,12,2,128,1,fp8,fp8,0,0.022668799757957457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,12,4,128,1,float16,float16,0,0.022758400440216063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,12,4,128,1,float16,fp8,0,0.022742399573326112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,12,4,128,1,fp8,fp8,0,0.02266400009393692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,12,12,128,1,float16,float16,0,0.01655679941177368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,12,12,128,1,float16,fp8,0,0.016468800604343414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,12,1,128,1,float16,float16,0,0.01451520025730133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,12,12,128,1,fp8,fp8,0,0.017319999635219574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,12,1,128,1,float16,fp8,0,0.014523200690746307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,12,1,128,1,fp8,fp8,0,0.014567999541759491
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,12,2,128,1,float16,float16,0,0.015539200603961944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,12,2,128,1,float16,fp8,0,0.014569599926471711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,12,2,128,1,fp8,fp8,0,0.014564800262451171
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,12,4,128,1,float16,float16,0,0.016422399878501893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,12,4,128,1,fp8,fp8,0,0.01653439998626709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,12,12,128,1,float16,float16,0,0.013956800103187561
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,12,12,128,1,float16,fp8,0,0.012608000636100769
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,12,12,128,1,fp8,fp8,0,0.012515200674533844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,12,1,128,1,float16,float16,0,0.012572799623012543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,12,1,128,1,float16,fp8,0,0.012611199915409089
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,12,1,128,1,fp8,fp8,0,0.012587200105190276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,12,2,128,1,float16,float16,0,0.012654399871826172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,12,2,128,1,float16,fp8,0,0.012478400021791458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,12,2,128,1,fp8,fp8,0,0.012459199875593185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,12,4,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,12,4,128,1,float16,fp8,0,0.012566399574279786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,12,12,128,1,float16,float16,0,0.024740800261497498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,12,4,128,1,fp8,fp8,0,0.012614400684833526
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,12,12,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,12,12,128,1,float16,fp8,0,0.010579200088977813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,12,12,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,12,1,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,12,1,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,12,1,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,12,2,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,12,2,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,12,2,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,12,4,128,1,float16,float16,0,0.010329599678516387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,12,4,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,12,4,128,1,fp8,fp8,0,0.010331200063228607
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,12,12,128,1,float16,fp8,0,0.00846880003809929
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,12,12,128,1,float16,float16,0,0.010315199941396713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,12,1,128,1,float16,float16,0,0.009585600346326828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,12,1,128,1,float16,fp8,0,0.010182400047779084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,12,1,128,1,fp8,fp8,0,0.008395200222730636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,12,2,128,1,float16,float16,0,0.010311999917030334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,12,2,128,1,float16,fp8,0,0.008420799672603608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,12,2,128,1,fp8,fp8,0,0.010351999849081039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,12,4,128,1,float16,float16,0,0.009635200351476669
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,12,4,128,1,float16,fp8,0,0.010345599800348281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,12,4,128,1,fp8,fp8,0,0.008345600217580795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,12,12,128,1,float16,float16,0,0.01053600013256073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,12,12,128,1,float16,fp8,0,0.008454400300979614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,12,12,128,1,fp8,fp8,0,0.010291200131177902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,12,4,128,1,float16,fp8,0,0.014582400023937226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,12,1,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,12,1,128,1,float16,fp8,0,0.010291200131177902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,12,1,128,1,fp8,fp8,0,0.008417599648237229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,12,2,128,1,float16,float16,0,0.010358399897813796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,12,2,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,12,2,128,1,fp8,fp8,0,0.008577600121498108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,12,4,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,12,4,128,1,float16,fp8,0,0.010335999727249145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,12,4,128,1,fp8,fp8,0,0.008908800035715102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,12,12,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,12,12,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,12,12,128,1,fp8,fp8,0,0.009120000153779983
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,12,1,128,1,float16,float16,0,0.008486399799585343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,12,1,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,12,1,128,1,fp8,fp8,0,0.00835679993033409
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,12,2,128,1,float16,float16,0,0.010283199697732925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,12,2,128,1,float16,fp8,0,0.008363199979066848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,12,2,128,1,fp8,fp8,0,0.008551999926567078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,12,4,128,1,float16,float16,0,0.0083856001496315
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,12,4,128,1,float16,fp8,0,0.008534400165081025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,12,4,128,1,fp8,fp8,0,0.008388800173997879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,12,1,128,1,float16,fp8,0,0.057739198207855225
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,12,1,128,1,fp8,fp8,0,0.058143997192382814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,12,2,128,1,float16,float16,0,0.06348159909248352
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,12,2,128,1,float16,fp8,0,0.05771840214729309
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,12,2,128,1,fp8,fp8,0,0.05797280073165893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,12,4,128,1,float16,float16,0,0.06362720131874085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,12,4,128,1,float16,fp8,0,0.05772320032119751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,12,12,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,12,4,128,1,fp8,fp8,0,0.05752800107002258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,0,0.036769598722457886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,0,0.03306879997253418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,12,12,128,1,fp8,fp8,0,0.0347791999578476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,12,1,128,1,float16,float16,0,0.035076799988746646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,12,1,128,1,float16,fp8,0,0.03420639932155609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,12,1,128,1,fp8,fp8,0,0.032979199290275575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,12,2,128,1,float16,float16,0,0.035046398639678955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,12,2,128,1,float16,fp8,0,0.03294239938259125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,12,2,128,1,fp8,fp8,0,0.03495680093765259
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,12,4,128,1,float16,float16,0,0.03503359854221344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,12,4,128,1,float16,fp8,0,0.03495039939880371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,12,4,128,1,fp8,fp8,0,0.032953599095344545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,0,0.02282239943742752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,0,0.020659199357032774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,12,12,128,1,fp8,fp8,0,0.022628800570964815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,12,1,128,1,float16,float16,0,0.02274879962205887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,12,1,128,1,float16,fp8,0,0.022785599529743194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,12,2,128,1,float16,float16,0,0.022776000201702118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,12,2,128,1,float16,fp8,0,0.022651199996471406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,12,2,128,1,fp8,fp8,0,0.02269120067358017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,12,1,128,1,float16,float16,0,0.062224000692367554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,12,4,128,1,float16,fp8,0,0.02146719992160797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,12,4,128,1,fp8,fp8,0,0.022679999470710754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,0,0.0165120005607605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,0,0.014596800506114959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,12,12,128,1,fp8,fp8,0,0.014596800506114959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,12,1,128,1,float16,float16,0,0.016641600430011748
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,12,1,128,1,float16,fp8,0,0.014638400077819825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,12,1,128,1,fp8,fp8,0,0.014672000706195832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,12,2,128,1,float16,float16,0,0.01650400012731552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,12,2,128,1,float16,fp8,0,0.014636799693107605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,12,2,128,1,fp8,fp8,0,0.014692799746990204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,12,4,128,1,float16,float16,0,0.016495999693870545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,12,4,128,1,float16,fp8,0,0.014632000029087067
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,12,4,128,1,fp8,fp8,0,0.014537599682807923
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,0,0.01249919980764389
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,12,12,128,1,fp8,fp8,0,0.0125231996178627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,12,1,128,1,float16,float16,0,0.012457600235939026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,12,1,128,1,float16,fp8,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,12,1,128,1,fp8,fp8,0,0.01242239996790886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,12,1,128,1,fp8,fp8,0,0.02105119973421097
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,12,2,128,1,float16,float16,0,0.012417600303888322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,12,2,128,1,float16,fp8,0,0.012465599924325943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,12,4,128,1,float16,float16,0,0.012492799758911132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,12,4,128,1,float16,fp8,0,0.0124208003282547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,12,4,128,1,float16,float16,0,0.02282720059156418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,12,4,128,1,fp8,fp8,0,0.012403199821710587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,12,12,128,1,fp8,fp8,0,0.010344000160694122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,12,1,128,1,float16,float16,0,0.010527999699115753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,12,1,128,1,float16,fp8,0,0.01029760017991066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,12,1,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,12,2,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,12,2,128,1,float16,fp8,0,0.010356800258159637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,12,2,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,12,4,128,1,float16,float16,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,12,4,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,12,4,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,0,0.010382399708032609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,0,0.010337600111961364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,12,12,128,1,fp8,fp8,0,0.010332799702882766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,0,0.01324480026960373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,12,1,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,12,1,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,12,2,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,12,2,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,12,2,128,1,fp8,fp8,0,0.010351999849081039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,12,4,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,12,4,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,12,2,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,12,4,128,1,fp8,fp8,0,0.010286399722099304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,0,0.00905120000243187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,12,12,128,1,fp8,fp8,0,0.008767999708652496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,12,1,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,12,1,128,1,float16,fp8,0,0.008491200208663941
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,12,1,128,1,fp8,fp8,0,0.008603200316429138
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,12,2,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,12,2,128,1,float16,fp8,0,0.009145600348711013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,12,2,128,1,fp8,fp8,0,0.008531200140714646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,12,4,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,12,4,128,1,float16,fp8,0,0.008641599863767623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,12,4,128,1,fp8,fp8,0,0.008449599891901017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,0,0.008425600081682205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,12,12,128,1,fp8,fp8,0,0.008686400204896926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,12,1,128,1,float16,float16,0,0.008462399989366532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,12,1,128,1,float16,fp8,0,0.008479999750852585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,12,1,128,1,fp8,fp8,0,0.008376000076532364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,12,2,128,1,float16,float16,0,0.010351999849081039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,12,2,128,1,float16,fp8,0,0.008392000198364257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,12,2,128,1,fp8,fp8,0,0.010340800136327743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,12,4,128,1,float16,float16,0,0.00843520015478134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,12,4,128,1,fp8,fp8,0,0.00846719965338707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,12,4,128,1,float16,fp8,0,0.010318399965763092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,12,1,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,8,1,128,1,float16,fp8,0,2.0583295822143555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,8,1,128,1,float16,float16,0,2.4485488891601563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,8,1,128,1,fp8,fp8,0,2.0690303802490235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,8,2,128,1,float16,float16,0,2.4103456497192384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,8,2,128,1,float16,fp8,0,2.245992088317871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,8,2,128,1,fp8,fp8,0,2.0798112869262697
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,8,8,128,1,float16,float16,0,1.3755871772766113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,8,4,128,1,float16,fp8,0,2.070729637145996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,8,4,128,1,float16,float16,0,2.641003227233887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,8,4,128,1,fp8,fp8,0,2.0662336349487305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,8,8,128,1,float16,fp8,0,1.141220760345459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,8,8,128,1,fp8,fp8,0,1.086308765411377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,8,1,128,1,float16,float16,0,1.2690303802490235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,8,1,128,1,float16,fp8,0,1.0746031761169434
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,8,1,128,1,fp8,fp8,0,1.088584041595459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,8,2,128,1,float16,float16,0,1.2547951698303224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,8,2,128,1,float16,fp8,0,1.113094425201416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,8,4,128,1,float16,float16,0,1.3368576049804688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,8,4,128,1,float16,fp8,0,1.1745663642883302
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,8,4,128,1,fp8,fp8,0,1.0845775604248047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,8,8,128,1,float16,fp8,0,0.6537024021148682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,8,8,128,1,float16,float16,0,0.7557775974273682
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,8,8,128,1,fp8,fp8,0,0.5890575885772705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,8,1,128,1,float16,fp8,0,0.6550559997558594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,8,1,128,1,fp8,fp8,0,0.5806352138519287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,8,2,128,1,float16,fp8,0,0.5822319984436035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,8,2,128,1,float16,float16,0,0.7312416076660156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,8,2,128,1,fp8,fp8,0,1.083243179321289
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,8,2,128,1,fp8,fp8,0,0.5812960147857666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,8,4,128,1,float16,float16,0,0.7216400146484375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,8,4,128,1,float16,fp8,0,0.5837520122528076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,8,4,128,1,fp8,fp8,0,0.5833392143249512
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,8,8,128,1,float16,float16,0,0.4260735988616943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,8,8,128,1,float16,fp8,0,0.33656480312347414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,8,8,128,1,fp8,fp8,0,0.33777120113372805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,8,1,128,1,float16,float16,0,0.39319040775299074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,8,1,128,1,float16,fp8,0,0.3336992025375366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,8,1,128,1,fp8,fp8,0,0.3329119920730591
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,8,2,128,1,float16,float16,0,0.38950400352478026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,8,2,128,1,float16,fp8,0,0.3335423946380615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,8,2,128,1,fp8,fp8,0,0.333951997756958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,8,1,128,1,float16,float16,0,0.6645232200622558
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,8,4,128,1,float16,float16,0,0.39825599193572997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,8,4,128,1,fp8,fp8,0,0.3327120065689087
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,8,1,128,1,float16,fp8,0,1.239508819580078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,8,1,128,1,float16,float16,0,1.4021984100341798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,8,1,128,1,fp8,fp8,0,1.2339344024658203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,8,2,128,1,float16,fp8,0,1.267728042602539
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,8,2,128,1,float16,float16,0,1.4724415779113769
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,8,4,128,1,float16,fp8,0,0.3339760065078735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,8,4,128,1,float16,fp8,0,1.3320783615112304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,8,4,128,1,float16,float16,0,1.4589648246765137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,8,4,128,1,fp8,fp8,0,1.239094352722168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,8,8,128,1,float16,fp8,0,0.6609263896942139
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,8,8,128,1,float16,float16,0,0.8256752014160156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,8,8,128,1,fp8,fp8,0,0.6601920127868652
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,8,1,128,1,float16,float16,0,0.7463295936584473
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,8,1,128,1,float16,fp8,0,0.6556623935699463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,8,1,128,1,fp8,fp8,0,0.651475191116333
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,8,2,128,1,fp8,fp8,0,1.2371279716491699
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,8,2,128,1,float16,float16,0,0.7495759963989258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,8,2,128,1,fp8,fp8,0,0.6524975776672364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,8,4,128,1,float16,float16,0,0.763756799697876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,8,4,128,1,float16,fp8,0,0.7075407981872559
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,8,8,128,1,float16,float16,0,0.44078240394592283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,8,4,128,1,fp8,fp8,0,0.6552480220794678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,8,8,128,1,fp8,fp8,0,0.36695840358734133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,8,1,128,1,float16,float16,0,0.4077119827270508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,8,1,128,1,float16,fp8,0,0.35933599472045896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,8,1,128,1,fp8,fp8,0,0.3597743988037109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,8,2,128,1,float16,float16,0,0.40526881217956545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,8,2,128,1,float16,fp8,0,0.35928959846496583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,8,2,128,1,fp8,fp8,0,0.36011199951171874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,8,4,128,1,float16,float16,0,0.4257984161376953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,8,4,128,1,float16,fp8,0,0.36112959384918214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,8,2,128,1,float16,fp8,0,0.6535632133483886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,8,4,128,1,fp8,fp8,0,0.3608880043029785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,8,8,128,1,float16,float16,0,0.2622672080993652
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,8,8,128,1,float16,fp8,0,0.21404318809509276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,8,8,128,1,fp8,fp8,0,0.21365599632263182
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,8,1,128,1,float16,float16,0,0.2375119924545288
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,8,1,128,1,float16,fp8,0,0.21417601108551027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,8,1,128,1,fp8,fp8,0,0.2134320020675659
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,8,2,128,1,float16,float16,0,0.2370527982711792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,8,2,128,1,float16,fp8,0,0.21346719264984132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,8,2,128,1,fp8,fp8,0,0.21308319568634032
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,8,4,128,1,float16,float16,0,0.24085440635681152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,8,8,128,1,float16,fp8,0,0.39483840465545655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,8,4,128,1,float16,fp8,0,0.21325600147247314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,8,4,128,1,fp8,fp8,0,0.21332321166992188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,8,1,128,1,float16,float16,0,1.0055423736572267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,8,1,128,1,fp8,fp8,0,0.8986783981323242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,8,2,128,1,float16,float16,0,1.0181535720825194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,8,2,128,1,float16,fp8,0,0.90164155960083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,8,2,128,1,fp8,fp8,0,0.927996826171875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,8,4,128,1,float16,float16,0,1.050939178466797
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,8,4,128,1,float16,fp8,0,0.9037376403808594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,8,4,128,1,fp8,fp8,0,0.9034976005554199
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,8,1,128,1,float16,fp8,0,0.9008607864379883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,8,8,128,1,float16,float16,0,0.5898255825042724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,8,8,128,1,float16,fp8,0,0.48677759170532225
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,8,8,128,1,fp8,fp8,0,0.48772802352905276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,8,1,128,1,float16,float16,0,0.5333360195159912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,8,1,128,1,float16,fp8,0,0.47964158058166506
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,8,1,128,1,fp8,fp8,0,0.4799168109893799
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,8,2,128,1,float16,fp8,0,0.4803167819976807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,8,2,128,1,fp8,fp8,0,0.479856014251709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,8,4,128,1,float16,fp8,0,0.4831711769104004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,8,4,128,1,float16,float16,0,0.5781871795654296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,8,4,128,1,fp8,fp8,0,0.48224477767944335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,8,8,128,1,float16,float16,0,0.3328223943710327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,8,8,128,1,fp8,fp8,0,0.2741856098175049
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,8,1,128,1,float16,float16,0,0.29699039459228516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,8,1,128,1,float16,fp8,0,0.267903995513916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,8,1,128,1,fp8,fp8,0,0.26981439590454104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,8,2,128,1,float16,float16,0,0.3010656118392944
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,8,2,128,1,float16,fp8,0,0.2679248094558716
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,8,2,128,1,fp8,fp8,0,0.2682352066040039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,8,4,128,1,float16,float16,0,0.31119840145111083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,8,4,128,1,float16,fp8,0,0.26857600212097166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,8,4,128,1,fp8,fp8,0,0.26825599670410155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,8,8,128,1,float16,float16,0,0.1993648052215576
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,8,8,128,1,float16,fp8,0,0.1660416007041931
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,8,8,128,1,fp8,fp8,0,0.16620479822158812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,8,1,128,1,float16,float16,0,0.18076640367507935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,8,2,128,1,float16,float16,0,0.5407360076904297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,8,1,128,1,float16,fp8,0,0.1661296010017395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,8,1,128,1,fp8,fp8,0,0.1662160038948059
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,8,2,128,1,float16,float16,0,0.18071520328521729
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,8,2,128,1,float16,fp8,0,0.16637760400772095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,8,2,128,1,fp8,fp8,0,0.16625599861145018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,8,4,128,1,float16,float16,0,0.18409279584884644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,8,4,128,1,float16,fp8,0,0.1661839962005615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,8,4,128,1,fp8,fp8,0,0.16616640090942383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,8,8,128,1,float16,fp8,0,0.2751104116439819
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,8,1,128,1,float16,fp8,0,1.1819791793823242
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,8,1,128,1,float16,float16,0,1.2934736251831054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,8,1,128,1,fp8,fp8,0,1.1803759574890136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,8,2,128,1,float16,float16,0,1.3631088256835937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,8,2,128,1,float16,fp8,0,1.1851887702941895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,8,2,128,1,fp8,fp8,0,1.1912079811096192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,8,4,128,1,float16,float16,0,1.3653823852539062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,8,4,128,1,float16,fp8,0,1.2959712028503418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,8,8,128,1,float16,float16,0,0.7602719783782959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,8,8,128,1,float16,fp8,0,0.6243631839752197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,8,4,128,1,fp8,fp8,0,1.1933856010437012
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,8,8,128,1,fp8,fp8,0,0.6992224216461181
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,8,1,128,1,float16,float16,0,0.6749120235443116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,8,1,128,1,float16,fp8,0,0.6585584163665772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,8,1,128,1,fp8,fp8,0,0.6149903774261475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,8,2,128,1,float16,float16,0,0.7027535915374756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,8,2,128,1,float16,fp8,0,0.6362527847290039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,8,2,128,1,fp8,fp8,0,0.6344272136688233
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,8,4,128,1,float16,float16,0,0.7147696018218994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,8,4,128,1,float16,fp8,0,0.6230256080627441
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,8,8,128,1,float16,float16,0,0.41117281913757325
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,8,4,128,1,fp8,fp8,0,0.643232011795044
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,8,8,128,1,float16,fp8,0,0.34155199527740476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,8,8,128,1,fp8,fp8,0,0.34197120666503905
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,8,1,128,1,float16,float16,0,0.3630959987640381
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,8,1,128,1,float16,fp8,0,0.3321216106414795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,8,1,128,1,fp8,fp8,0,0.3340464115142822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,8,2,128,1,float16,float16,0,0.3771039962768555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,8,2,128,1,float16,fp8,0,0.33592801094055175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,8,2,128,1,fp8,fp8,0,0.3337408065795898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,8,4,128,1,float16,float16,0,0.38812639713287356
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,8,4,128,1,float16,fp8,0,0.33604159355163576
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,8,4,128,1,fp8,fp8,0,0.3350640058517456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,8,8,128,1,float16,float16,0,0.23561439514160157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,8,8,128,1,float16,fp8,0,0.19475200176239013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,8,8,128,1,fp8,fp8,0,0.1951248049736023
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,8,1,128,1,float16,float16,0,0.20417919158935546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,8,1,128,1,float16,fp8,0,0.19063199758529664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,8,1,128,1,fp8,fp8,0,0.19061280488967897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,8,2,128,1,float16,float16,0,0.211411190032959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,8,2,128,1,float16,fp8,0,0.19086400270462037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,8,2,128,1,fp8,fp8,0,0.1906831979751587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,8,4,128,1,float16,float16,0,0.21807520389556884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,8,4,128,1,float16,fp8,0,0.1914463996887207
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,8,4,128,1,fp8,fp8,0,0.19083360433578492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,8,8,128,1,float16,float16,0,0.13816800117492675
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,8,8,128,1,float16,fp8,0,0.12109600305557251
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,8,1,128,1,float16,float16,0,0.12946560382843017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,8,1,128,1,float16,fp8,0,0.12111200094223022
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,8,1,128,1,fp8,fp8,0,0.12108960151672363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,8,2,128,1,float16,float16,0,0.1294095993041992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,8,2,128,1,float16,fp8,0,0.12100800275802612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,8,2,128,1,fp8,fp8,0,0.12108000516891479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,8,4,128,1,float16,float16,0,0.13127520084381103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,8,4,128,1,float16,fp8,0,0.12111680507659912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,8,4,128,1,fp8,fp8,0,0.12109600305557251
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,8,1,128,1,float16,float16,0,0.7733407974243164
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,8,1,128,1,fp8,fp8,0,0.7349679946899415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,8,2,128,1,float16,float16,0,0.7810416221618652
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,8,8,128,1,fp8,fp8,0,0.12262239456176757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,8,2,128,1,float16,fp8,0,0.7397552013397217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,8,2,128,1,fp8,fp8,0,0.7325263977050781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,8,4,128,1,float16,float16,0,0.8244480133056641
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,8,1,128,1,float16,fp8,0,0.727235221862793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,8,4,128,1,float16,fp8,0,0.7356080055236817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,8,8,128,1,float16,fp8,0,0.39335520267486573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,8,4,128,1,fp8,fp8,0,0.7331759929656982
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,8,8,128,1,fp8,fp8,0,0.3934432029724121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,8,1,128,1,float16,float16,0,0.4095327854156494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,8,1,128,1,float16,fp8,0,0.3861151933670044
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,8,1,128,1,fp8,fp8,0,0.3916208028793335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,8,2,128,1,float16,float16,0,0.41539998054504396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,8,2,128,1,float16,fp8,0,0.3868720054626465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,8,2,128,1,fp8,fp8,0,0.38464319705963135
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,8,4,128,1,float16,float16,0,0.4356192111968994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,8,4,128,1,float16,fp8,0,0.3893104076385498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,8,4,128,1,fp8,fp8,0,0.3877295970916748
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,8,8,128,1,float16,float16,0,0.262827205657959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,8,8,128,1,float16,fp8,0,0.2183471918106079
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,8,8,128,1,fp8,fp8,0,0.21752960681915284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,8,1,128,1,float16,float16,0,0.22589120864868165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,8,1,128,1,float16,fp8,0,0.2122704029083252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,8,1,128,1,fp8,fp8,0,0.2117072105407715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,8,2,128,1,float16,float16,0,0.22792959213256836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,8,2,128,1,float16,fp8,0,0.2132175922393799
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,8,2,128,1,fp8,fp8,0,0.2132496118545532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,8,4,128,1,float16,float16,0,0.24036478996276855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,8,4,128,1,float16,fp8,0,0.21409759521484376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,8,4,128,1,fp8,fp8,0,0.21316640377044677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,8,8,128,1,float16,float16,0,0.1549023985862732
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,8,8,128,1,float16,fp8,0,0.12700159549713136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,8,8,128,1,float16,float16,0,0.4737855911254883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,8,8,128,1,fp8,fp8,0,0.127128005027771
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,8,1,128,1,float16,float16,0,0.13082079887390136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,8,1,128,1,float16,fp8,0,0.12519359588623047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,8,1,128,1,fp8,fp8,0,0.12487200498580933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,8,2,128,1,float16,float16,0,0.13005599975585938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,8,2,128,1,fp8,fp8,0,0.1252527952194214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,8,4,128,1,float16,fp8,0,0.12513760328292847
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,8,4,128,1,fp8,fp8,0,0.12508800029754638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,8,8,128,1,float16,float16,0,0.09039679765701295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,8,8,128,1,float16,fp8,0,0.08206400275230408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,8,8,128,1,fp8,fp8,0,0.08200799822807311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,8,1,128,1,float16,float16,0,0.08623520135879517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,8,1,128,1,float16,fp8,0,0.08208640217781067
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,8,1,128,1,fp8,fp8,0,0.08210880160331727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,8,2,128,1,float16,float16,0,0.08622239828109741
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,8,2,128,1,float16,fp8,0,0.08206719756126404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,8,2,128,1,fp8,fp8,0,0.08037440180778503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,8,4,128,1,float16,float16,0,0.08827360272407532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,8,4,128,1,float16,fp8,0,0.08176000118255615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,8,4,128,1,fp8,fp8,0,0.08216320276260376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,8,1,128,1,float16,float16,0,0.7398928165435791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,8,1,128,1,float16,fp8,0,0.7331488132476807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,8,1,128,1,fp8,fp8,0,0.7299071788787842
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,8,2,128,1,float16,float16,0,0.7689008235931396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,8,2,128,1,float16,fp8,0,0.1249343991279602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,8,4,128,1,float16,float16,0,0.13357919454574585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,8,2,128,1,float16,fp8,0,0.7323999881744385
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,8,2,128,1,fp8,fp8,0,0.7359055995941162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,8,4,128,1,float16,float16,0,0.8066864013671875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,8,4,128,1,float16,fp8,0,0.7357007980346679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,8,8,128,1,float16,float16,0,0.47899680137634276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,8,4,128,1,fp8,fp8,0,0.73645920753479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,8,8,128,1,float16,fp8,0,0.39089601039886473
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,8,8,128,1,fp8,fp8,0,0.3904416084289551
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,8,1,128,1,float16,float16,0,0.3950016021728516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,8,1,128,1,float16,fp8,0,0.3808016061782837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,8,2,128,1,float16,float16,0,0.3884927988052368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,8,2,128,1,float16,fp8,0,0.38310720920562746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,8,2,128,1,fp8,fp8,0,0.3816864013671875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,8,4,128,1,float16,float16,0,0.41487679481506345
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,8,4,128,1,float16,fp8,0,0.38626561164855955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,8,4,128,1,fp8,fp8,0,0.3842463970184326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,8,8,128,1,float16,float16,0,0.25322399139404295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,8,8,128,1,float16,fp8,0,0.21268959045410157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,8,8,128,1,fp8,fp8,0,0.21158080101013182
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,8,1,128,1,float16,float16,0,0.2080080032348633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,8,1,128,1,float16,fp8,0,0.2056960105895996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,8,1,128,1,fp8,fp8,0,0.205132794380188
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,8,2,128,1,float16,float16,0,0.2097327947616577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,8,2,128,1,float16,fp8,0,0.2069727897644043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,8,2,128,1,fp8,fp8,0,0.20616159439086915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,8,4,128,1,float16,float16,0,0.22600479125976564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,8,4,128,1,float16,fp8,0,0.2074575901031494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,8,4,128,1,fp8,fp8,0,0.2071295976638794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,8,8,128,1,float16,float16,0,0.14423680305480957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,8,8,128,1,float16,fp8,0,0.1209663987159729
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,8,8,128,1,fp8,fp8,0,0.12080639600753784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,8,1,128,1,float16,float16,0,0.11697759628295898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,8,1,128,1,float16,fp8,0,0.11711039543151855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,8,1,128,1,fp8,fp8,0,0.1169424057006836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,8,2,128,1,float16,float16,0,0.11982560157775879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,8,2,128,1,float16,fp8,0,0.11699999570846557
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,8,2,128,1,fp8,fp8,0,0.11728960275650024
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,8,4,128,1,float16,float16,0,0.1263967990875244
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,8,4,128,1,float16,fp8,0,0.11787199974060059
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,8,4,128,1,fp8,fp8,0,0.11737120151519775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,8,8,128,1,float16,float16,0,0.08567360043525696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,8,8,128,1,float16,fp8,0,0.07555519938468933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,8,8,128,1,fp8,fp8,0,0.07561439871788025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,8,1,128,1,float16,float16,0,0.07608000040054322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,8,1,128,1,fp8,fp8,0,0.381441593170166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,8,1,128,1,float16,fp8,0,0.07544159889221191
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,8,1,128,1,fp8,fp8,0,0.07396640181541443
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,8,2,128,1,float16,float16,0,0.07596160173416137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,8,2,128,1,float16,fp8,0,0.07509120106697083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,8,4,128,1,float16,float16,0,0.07802879810333252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,8,4,128,1,float16,fp8,0,0.07588319778442383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,8,4,128,1,fp8,fp8,0,0.07407839894294739
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,8,8,128,1,float16,float16,0,0.053416001796722415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,8,8,128,1,float16,fp8,0,0.04734080135822296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,8,8,128,1,fp8,fp8,0,0.04734080135822296
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,8,1,128,1,float16,float16,0,0.0493151992559433
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,8,1,128,1,float16,fp8,0,0.047366398572921756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,8,1,128,1,fp8,fp8,0,0.047244799137115476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,8,2,128,1,float16,fp8,0,0.0472815990447998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,8,2,128,1,float16,float16,0,0.04943839907646179
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,8,2,128,1,fp8,fp8,0,0.04729920029640198
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,8,4,128,1,float16,fp8,0,0.04731679856777191
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,8,4,128,1,fp8,fp8,0,0.04735040068626404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,8,1,128,1,float16,float16,0,0.4566239833831787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,8,1,128,1,float16,fp8,0,0.4679759979248047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,8,1,128,1,fp8,fp8,0,0.46793599128723146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,8,2,128,1,fp8,fp8,0,0.07485120296478272
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,8,2,128,1,float16,float16,0,0.4630943775177002
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,8,2,128,1,float16,fp8,0,0.4692671775817871
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,8,2,128,1,fp8,fp8,0,0.4696352005004883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,8,4,128,1,float16,float16,0,0.05063679814338684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,8,4,128,1,float16,float16,0,0.5009759902954102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,8,4,128,1,float16,fp8,0,0.471998405456543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,8,8,128,1,float16,fp8,0,0.2529680013656616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,8,8,128,1,float16,float16,0,0.3004175901412964
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,8,4,128,1,fp8,fp8,0,0.4733712196350098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,8,8,128,1,fp8,fp8,0,0.253057599067688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,8,1,128,1,float16,float16,0,0.23973441123962402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,8,1,128,1,float16,fp8,0,0.24582560062408448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,8,1,128,1,fp8,fp8,0,0.24760160446166993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,8,2,128,1,float16,float16,0,0.24619679450988768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,8,2,128,1,float16,fp8,0,0.24728000164031982
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,8,2,128,1,fp8,fp8,0,0.24720799922943115
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,8,4,128,1,float16,float16,0,0.2683919906616211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,8,4,128,1,float16,fp8,0,0.2495584011077881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,8,4,128,1,fp8,fp8,0,0.24919838905334474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,8,8,128,1,float16,fp8,0,0.14060800075531005
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,8,8,128,1,fp8,fp8,0,0.13950560092926026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,8,1,128,1,float16,float16,0,0.1329103946685791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,8,1,128,1,float16,fp8,0,0.13559039831161498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,8,1,128,1,fp8,fp8,0,0.13532480001449584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,8,2,128,1,float16,float16,0,0.13426879644393921
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,8,2,128,1,float16,fp8,0,0.13581759929656984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,8,2,128,1,fp8,fp8,0,0.13551039695739747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,8,4,128,1,float16,float16,0,0.14764800071716308
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,8,4,128,1,float16,fp8,0,0.1375264048576355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,8,4,128,1,fp8,fp8,0,0.1371840000152588
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,8,8,128,1,float16,float16,0,0.09667840003967285
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,8,8,128,1,float16,fp8,0,0.08021280169487
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,8,8,128,1,fp8,fp8,0,0.08013759851455689
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,8,1,128,1,float16,float16,0,0.0780672013759613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,8,1,128,1,float16,fp8,0,0.08006399869918823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,8,1,128,1,fp8,fp8,0,0.07912480235099792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,8,2,128,1,float16,float16,0,0.07799839973449707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,8,2,128,1,float16,fp8,0,0.07995839715003968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,8,2,128,1,fp8,fp8,0,0.07979519963264466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,8,4,128,1,float16,float16,0,0.08219040036201478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,8,4,128,1,float16,fp8,0,0.08000800013542175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,8,8,128,1,float16,float16,0,0.05752639770507813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,8,4,128,1,fp8,fp8,0,0.07938560247421264
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,8,8,128,1,float16,fp8,0,0.05156959891319275
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,8,8,128,1,fp8,fp8,0,0.05146399736404419
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,8,1,128,1,float16,float16,0,0.05178880095481873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,8,1,128,1,float16,fp8,0,0.05153759717941284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,8,1,128,1,fp8,fp8,0,0.05143679976463318
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,8,2,128,1,float16,float16,0,0.05187039971351624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,8,2,128,1,float16,fp8,0,0.05152159929275513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,8,2,128,1,fp8,fp8,0,0.051523202657699586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,8,4,128,1,float16,float16,0,0.05343520045280457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,8,4,128,1,float16,fp8,0,0.05148800015449524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,8,4,128,1,fp8,fp8,0,0.05151039958000183
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,8,8,128,1,float16,float16,0,0.04118399918079376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,8,8,128,1,float16,fp8,0,0.038971200585365295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,8,8,128,1,fp8,fp8,0,0.03910079896450043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,8,1,128,1,float16,float16,0,0.03910079896450043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,8,1,128,1,float16,fp8,0,0.03922240138053894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,8,1,128,1,fp8,fp8,0,0.03892799913883209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,8,2,128,1,float16,float16,0,0.0391072005033493
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,8,2,128,1,float16,fp8,0,0.03911679983139038
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,8,4,128,1,float16,float16,0,0.039099198579788205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,8,4,128,1,float16,fp8,0,0.03901439905166626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,8,2,128,1,fp8,fp8,0,0.039113599061965945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,8,4,128,1,fp8,fp8,0,0.038913598656654357
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,8,1,128,1,float16,float16,0,0.4640079975128174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,8,8,128,1,float16,float16,0,0.16497440338134767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,8,1,128,1,float16,fp8,0,0.500324821472168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,8,1,128,1,fp8,fp8,0,0.501145601272583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,8,2,128,1,float16,float16,0,0.46483521461486815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,8,2,128,1,float16,fp8,0,0.5021791934967041
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,8,2,128,1,fp8,fp8,0,0.502294397354126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,8,4,128,1,float16,float16,0,0.5146080017089844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,8,4,128,1,float16,fp8,0,0.5042031764984131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,8,8,128,1,float16,float16,0,0.3125616073608398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,8,8,128,1,float16,fp8,0,0.26634399890899657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,8,4,128,1,fp8,fp8,0,0.5049871921539306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,8,8,128,1,fp8,fp8,0,0.26666080951690674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,8,1,128,1,float16,fp8,0,0.26047520637512206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,8,1,128,1,fp8,fp8,0,0.2601952075958252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,8,2,128,1,float16,float16,0,0.24780960083007814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,8,2,128,1,float16,fp8,0,0.2604671955108643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,8,2,128,1,fp8,fp8,0,0.26091680526733396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,8,4,128,1,float16,float16,0,0.2683295965194702
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,8,4,128,1,float16,fp8,0,0.26302878856658934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,8,4,128,1,fp8,fp8,0,0.262281608581543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,8,8,128,1,float16,float16,0,0.16999200582504273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,8,8,128,1,float16,fp8,0,0.14352480173110962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,8,8,128,1,fp8,fp8,0,0.14523520469665527
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,8,1,128,1,float16,float16,0,0.12897440195083618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,8,1,128,1,float16,fp8,0,0.14012960195541382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,8,1,128,1,fp8,fp8,0,0.1393407940864563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,8,2,128,1,float16,float16,0,0.1348688006401062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,8,2,128,1,float16,fp8,0,0.13971840143203734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,8,2,128,1,fp8,fp8,0,0.14099680185317992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,8,4,128,1,float16,float16,0,0.14620319604873658
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,8,4,128,1,float16,fp8,0,0.1422927975654602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,8,4,128,1,fp8,fp8,0,0.14114880561828613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,8,8,128,1,float16,float16,0,0.09836480021476746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,8,8,128,1,float16,fp8,0,0.08074560165405273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,8,1,128,1,float16,float16,0,0.07396159768104553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,8,1,128,1,float16,fp8,0,0.07863199710845947
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,8,1,128,1,fp8,fp8,0,0.07891839742660522
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,8,2,128,1,float16,float16,0,0.07399839758872986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,8,2,128,1,fp8,fp8,0,0.07899199724197388
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,8,4,128,1,float16,float16,0,0.08346880078315735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,8,4,128,1,float16,fp8,0,0.07908160090446473
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,8,4,128,1,fp8,fp8,0,0.0790063977241516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,8,8,128,1,float16,float16,0,0.05636320114135742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,8,8,128,1,float16,fp8,0,0.04948799908161163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,8,8,128,1,fp8,fp8,0,0.04938560128211975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,8,1,128,1,float16,float16,0,0.04766559898853302
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,8,1,128,1,float16,fp8,0,0.04936800003051758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,8,1,128,1,fp8,fp8,0,0.04943200051784515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,8,2,128,1,float16,float16,0,0.04778720140457153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,8,2,128,1,float16,fp8,0,0.049497601389884946
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,8,2,128,1,fp8,fp8,0,0.04944480061531067
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,8,4,128,1,float16,float16,0,0.05038560032844543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,8,8,128,1,fp8,fp8,0,0.08226879835128784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,8,4,128,1,float16,fp8,0,0.04945439994335175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,8,4,128,1,fp8,fp8,0,0.049563199281692505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,8,1,128,1,float16,float16,0,0.24025120735168456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,8,8,128,1,float16,float16,0,0.03498240113258362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,8,8,128,1,float16,fp8,0,0.03187359869480133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,8,8,128,1,fp8,fp8,0,0.030953601002693176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,8,1,128,1,float16,fp8,0,0.03107520043849945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,8,1,128,1,fp8,fp8,0,0.030988800525665283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,8,2,128,1,float16,float16,0,0.030990400910377504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,8,2,128,1,float16,fp8,0,0.031001600623130798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,8,2,128,1,fp8,fp8,0,0.03290880024433136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,8,2,128,1,float16,fp8,0,0.07828959822654724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,8,4,128,1,float16,float16,0,0.03292959928512573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,8,4,128,1,float16,fp8,0,0.032915198802948
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,8,4,128,1,fp8,fp8,0,0.030964800715446474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,8,8,128,1,float16,float16,0,0.030905601382255555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,8,8,128,1,float16,fp8,0,0.02895520031452179
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,8,8,128,1,fp8,fp8,0,0.02890239953994751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,8,1,128,1,float16,float16,0,0.02892000079154968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,8,1,128,1,float16,fp8,0,0.028960001468658448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,8,1,128,1,fp8,fp8,0,0.02882719933986664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,8,2,128,1,float16,float16,0,0.02900159955024719
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,8,2,128,1,float16,fp8,0,0.0289247989654541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,8,2,128,1,fp8,fp8,0,0.028787198662757873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,8,4,128,1,float16,float16,0,0.028892800211906433
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,8,4,128,1,float16,fp8,0,0.02884480059146881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,8,4,128,1,fp8,fp8,0,0.028780800104141236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,8,1,128,1,float16,float16,0,0.2961663961410522
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,8,1,128,1,float16,fp8,0,0.33628320693969727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,8,1,128,1,fp8,fp8,0,0.3356800079345703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,8,2,128,1,float16,float16,0,0.302020788192749
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,8,2,128,1,float16,fp8,0,0.33781919479370115
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,8,2,128,1,fp8,fp8,0,0.3363712072372437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,8,4,128,1,float16,float16,0,0.3367775917053223
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,8,4,128,1,float16,fp8,0,0.3398159980773926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,8,4,128,1,fp8,fp8,0,0.3389695882797241
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,8,8,128,1,float16,float16,0,0.21097280979156494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,8,8,128,1,float16,fp8,0,0.1825055956840515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,8,8,128,1,fp8,fp8,0,0.18242239952087402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,8,1,128,1,float16,float16,0,0.15694400072097778
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,8,1,128,1,float16,fp8,0,0.17678719758987427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,8,1,128,1,fp8,fp8,0,0.17660800218582154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,8,2,128,1,float16,float16,0,0.16103999614715575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,8,2,128,1,float16,fp8,0,0.1773311972618103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,8,2,128,1,fp8,fp8,0,0.17701280117034912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,8,1,128,1,float16,float16,0,0.031062400341033934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,8,4,128,1,float16,float16,0,0.17968000173568727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,8,4,128,1,float16,fp8,0,0.1801200032234192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,8,8,128,1,float16,float16,0,0.11515840291976928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,8,4,128,1,fp8,fp8,0,0.17850079536437988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,8,8,128,1,float16,fp8,0,0.1006384015083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,8,8,128,1,fp8,fp8,0,0.10050079822540284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,8,1,128,1,float16,float16,0,0.08731840252876281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,8,1,128,1,float16,fp8,0,0.09641119837760925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,8,1,128,1,fp8,fp8,0,0.09577919840812683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,8,2,128,1,float16,float16,0,0.08909119963645935
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,8,2,128,1,float16,fp8,0,0.09651039838790894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,8,2,128,1,fp8,fp8,0,0.0963424026966095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,8,4,128,1,float16,float16,0,0.1001039981842041
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,8,4,128,1,float16,fp8,0,0.09833279848098755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,8,4,128,1,fp8,fp8,0,0.0966319978237152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,8,8,128,1,float16,fp8,0,0.05749599933624268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,8,8,128,1,fp8,fp8,0,0.05664960145950317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,8,1,128,1,float16,float16,0,0.053311997652053834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,8,1,128,1,float16,fp8,0,0.05589119791984558
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,8,1,128,1,fp8,fp8,0,0.0570576012134552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,8,2,128,1,float16,float16,0,0.05177599787712097
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,8,2,128,1,float16,fp8,0,0.056857597827911374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,8,2,128,1,fp8,fp8,0,0.055587202310562134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,8,4,128,1,float16,float16,0,0.05763999819755554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,8,4,128,1,float16,fp8,0,0.05568320155143738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,8,4,128,1,fp8,fp8,0,0.056655997037887575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,8,8,128,1,float16,float16,0,0.04115999937057495
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,8,8,128,1,float16,fp8,0,0.0371535986661911
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,8,8,128,1,fp8,fp8,0,0.037036800384521486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,8,1,128,1,float16,float16,0,0.03653759956359863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,8,1,128,1,float16,fp8,0,0.03699840009212494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,8,1,128,1,fp8,fp8,0,0.037031999230384825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,8,2,128,1,float16,float16,0,0.03525440096855163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,8,2,128,1,float16,fp8,0,0.03704479932785034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,8,4,128,1,float16,float16,0,0.03710559904575348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,8,4,128,1,float16,fp8,0,0.03714880049228668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,8,4,128,1,fp8,fp8,0,0.037062400579452516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,8,8,128,1,float16,float16,0,0.02874079942703247
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,8,8,128,1,float16,fp8,0,0.026833599805831908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,8,8,128,1,fp8,fp8,0,0.026742398738861084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,8,1,128,1,float16,float16,0,0.02496480047702789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,8,1,128,1,float16,fp8,0,0.026705598831176756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,8,1,128,1,fp8,fp8,0,0.026732799410820008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,8,2,128,1,float16,float16,0,0.026631999015808105
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,8,2,128,1,float16,fp8,0,0.026708799600601196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,8,8,128,1,float16,float16,0,0.06786080002784729
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,8,2,128,1,fp8,fp8,0,0.026710399985313417
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,8,4,128,1,float16,float16,0,0.026732799410820008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,8,4,128,1,float16,fp8,0,0.026617598533630372
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,8,4,128,1,fp8,fp8,0,0.02680160105228424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,8,8,128,1,float16,float16,0,0.026743999123573302
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,8,8,128,1,float16,fp8,0,0.024702399969100952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,8,8,128,1,fp8,fp8,0,0.024748800694942473
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,8,1,128,1,float16,float16,0,0.024700799584388734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,8,1,128,1,float16,fp8,0,0.02475679963827133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,8,2,128,1,float16,float16,0,0.024784000217914583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,8,1,128,1,fp8,fp8,0,0.02475679963827133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,8,2,128,1,float16,fp8,0,0.024695999920368195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,8,2,128,1,fp8,fp8,0,0.024641600251197816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,8,4,128,1,float16,float16,0,0.02484000027179718
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,8,4,128,1,float16,fp8,0,0.024830399453639983
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,8,4,128,1,fp8,fp8,0,0.024646399915218352
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,8,1,128,1,float16,float16,0,0.31731998920440674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,8,2,128,1,fp8,fp8,0,0.03700799942016601
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,8,1,128,1,float16,fp8,0,0.3838848114013672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,8,2,128,1,float16,float16,0,0.3240047931671143
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,8,2,128,1,float16,fp8,0,0.38438239097595217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,8,2,128,1,fp8,fp8,0,0.3851999998092651
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,8,4,128,1,float16,float16,0,0.3678384065628052
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,8,4,128,1,float16,fp8,0,0.3859424114227295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,8,4,128,1,fp8,fp8,0,0.38744800090789794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,8,8,128,1,float16,float16,0,0.23429598808288574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,8,8,128,1,float16,fp8,0,0.20354719161987306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,8,8,128,1,fp8,fp8,0,0.204367995262146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,8,1,128,1,float16,float16,0,0.1668992042541504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,8,1,128,1,float16,fp8,0,0.19891999959945678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,8,1,128,1,fp8,fp8,0,0.19901759624481202
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,8,2,128,1,float16,float16,0,0.17034399509429932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,8,2,128,1,float16,fp8,0,0.19963040351867675
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,8,2,128,1,fp8,fp8,0,0.200545597076416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,8,4,128,1,float16,float16,0,0.19265279769897461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,8,4,128,1,float16,fp8,0,0.20107679367065429
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,8,8,128,1,float16,float16,0,0.1262096047401428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,8,4,128,1,fp8,fp8,0,0.20227200984954835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,8,8,128,1,float16,fp8,0,0.11051839590072632
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,8,8,128,1,fp8,fp8,0,0.11080960035324097
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,8,1,128,1,float16,fp8,0,0.10663360357284546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,8,1,128,1,fp8,fp8,0,0.10670080184936523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,8,2,128,1,float16,float16,0,0.09390400052070617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,8,2,128,1,float16,fp8,0,0.10680639743804932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,8,2,128,1,fp8,fp8,0,0.1067855954170227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,8,4,128,1,float16,float16,0,0.10482079982757568
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,8,4,128,1,float16,fp8,0,0.10767840147018433
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,8,4,128,1,fp8,fp8,0,0.1084015965461731
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,8,8,128,1,float16,float16,0,0.0721679985523224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,8,8,128,1,float16,fp8,0,0.061776000261306765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,8,8,128,1,fp8,fp8,0,0.06165120005607605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,8,1,128,1,float16,float16,0,0.05332319736480713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,8,1,128,1,float16,fp8,0,0.05947840213775635
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,8,1,128,1,fp8,fp8,0,0.05960000157356262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,8,2,128,1,float16,float16,0,0.053529602289199826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,8,2,128,1,float16,fp8,0,0.05963199734687805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,8,2,128,1,fp8,fp8,0,0.05959039926528931
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,8,4,128,1,float16,float16,0,0.059772801399230954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,8,4,128,1,float16,fp8,0,0.059596800804138185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,8,4,128,1,fp8,fp8,0,0.05967519879341125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,8,8,128,1,float16,float16,0,0.04198080003261566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,8,8,128,1,float16,fp8,0,0.037099200487136844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,8,8,128,1,fp8,fp8,0,0.03715679943561554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,8,1,128,1,float16,float16,0,0.034790399670600894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,8,1,128,1,float16,fp8,0,0.03702400028705597
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,8,1,128,1,float16,float16,0,0.09287359714508056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,8,1,128,1,fp8,fp8,0,0.03710399866104126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,8,2,128,1,float16,float16,0,0.0348688006401062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,8,2,128,1,float16,fp8,0,0.03711360096931458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,8,2,128,1,fp8,fp8,0,0.03722879886627197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,8,4,128,1,float16,fp8,0,0.03717760145664215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,8,4,128,1,fp8,fp8,0,0.03713119924068451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,8,8,128,1,float16,float16,0,0.02486560046672821
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,8,8,128,1,float16,fp8,0,0.022860799729824067
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,8,8,128,1,fp8,fp8,0,0.022841599583625794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,8,1,128,1,float16,float16,0,0.022681599855422972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,8,1,128,1,fp8,fp8,0,0.3836159944534302
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,8,1,128,1,float16,fp8,0,0.022915199398994446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,8,1,128,1,fp8,fp8,0,0.02276960015296936
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,8,2,128,1,float16,float16,0,0.022703999280929567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,8,2,128,1,float16,fp8,0,0.02271520048379898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,8,2,128,1,fp8,fp8,0,0.023838399350643157
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,8,4,128,1,float16,float16,0,0.02272160053253174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,8,4,128,1,float16,fp8,0,0.02431679964065552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,8,4,128,1,fp8,fp8,0,0.022732800245285033
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,8,8,128,1,float16,float16,0,0.022672000527381896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,8,8,128,1,float16,fp8,0,0.020640000700950623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,8,8,128,1,fp8,fp8,0,0.02078240066766739
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,8,1,128,1,float16,float16,0,0.020656000077724456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,8,1,128,1,float16,fp8,0,0.020550400018692017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,8,1,128,1,fp8,fp8,0,0.020659199357032774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,8,2,128,1,float16,float16,0,0.020664000511169435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,8,2,128,1,float16,fp8,0,0.020670400559902193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,8,2,128,1,fp8,fp8,0,0.020670400559902193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,8,4,128,1,float16,float16,0,0.020665599405765532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,8,4,128,1,float16,fp8,0,0.0208064004778862
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,8,4,128,1,fp8,fp8,0,0.020636799931526183
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,8,8,128,1,float16,float16,0,0.02085600048303604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,8,8,128,1,float16,fp8,0,0.01871519982814789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,8,1,128,1,float16,float16,0,0.020636799931526183
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,8,1,128,1,float16,fp8,0,0.01868959963321686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,8,1,128,1,fp8,fp8,0,0.01873439997434616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,8,2,128,1,float16,float16,0,0.020582400262355804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,8,2,128,1,float16,fp8,0,0.018697600066661834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,8,2,128,1,fp8,fp8,0,0.018603199720382692
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,8,4,128,1,float16,float16,0,0.036689600348472594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,8,4,128,1,float16,float16,0,0.020632000267505647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,8,4,128,1,float16,fp8,0,0.01980320066213608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,8,4,128,1,fp8,fp8,0,0.01863359957933426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,8,1,128,1,float16,float16,0,0.24867680072784423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,8,1,128,1,float16,fp8,0,0.3226448059082031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,8,1,128,1,fp8,fp8,0,0.323686408996582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,8,2,128,1,float16,float16,0,0.25358240604400634
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,8,2,128,1,float16,fp8,0,0.32333760261535643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,8,2,128,1,fp8,fp8,0,0.32395360469818113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,8,4,128,1,float16,float16,0,0.2937743902206421
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,8,4,128,1,float16,fp8,0,0.3244096040725708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,8,4,128,1,fp8,fp8,0,0.3245136022567749
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,8,8,128,1,float16,float16,0,0.19506239891052246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,8,8,128,1,float16,fp8,0,0.17023359537124633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,8,8,128,1,fp8,fp8,0,0.17082079648971557
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,8,1,128,1,float16,float16,0,0.1318303942680359
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,8,1,128,1,float16,fp8,0,0.16807839870452881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,8,1,128,1,fp8,fp8,0,0.1681167960166931
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,8,2,128,1,float16,fp8,0,0.1681391954421997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,8,2,128,1,fp8,fp8,0,0.16813280582427978
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,8,8,128,1,fp8,fp8,0,0.018644799292087556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,8,4,128,1,float16,float16,0,0.15395679473876953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,8,4,128,1,float16,fp8,0,0.1691663980484009
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,8,4,128,1,fp8,fp8,0,0.16911519765853883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,8,8,128,1,float16,float16,0,0.10467840433120727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,8,8,128,1,float16,fp8,0,0.09246399998664856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,8,8,128,1,fp8,fp8,0,0.09249759912490844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,8,1,128,1,float16,fp8,0,0.08942400217056275
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,8,1,128,1,fp8,fp8,0,0.09040319919586182
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,8,2,128,1,float16,float16,0,0.0740239977836609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,8,2,128,1,float16,fp8,0,0.09036319851875305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,8,2,128,1,fp8,fp8,0,0.09005119800567626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,8,4,128,1,float16,float16,0,0.0849568009376526
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,8,4,128,1,float16,fp8,0,0.09040639996528625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,8,4,128,1,fp8,fp8,0,0.09060320258140564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,8,8,128,1,float16,float16,0,0.05968800187110901
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,8,8,128,1,float16,fp8,0,0.05142719745635986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,8,8,128,1,fp8,fp8,0,0.051692801713943484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,8,1,128,1,float16,float16,0,0.041654399037361144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,8,1,128,1,float16,fp8,0,0.04938719868659973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,8,1,128,1,fp8,fp8,0,0.049798399209976196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,8,2,128,1,float16,float16,0,0.04212000072002411
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,8,2,128,1,float16,fp8,0,0.05023999810218811
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,8,2,128,1,fp8,fp8,0,0.04943200051784515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,8,4,128,1,float16,float16,0,0.04919840097427368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,8,4,128,1,fp8,fp8,0,0.050964802503585815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,8,8,128,1,float16,float16,0,0.035025599598884585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,8,2,128,1,float16,float16,0,0.13339040279388428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,8,8,128,1,float16,fp8,0,0.030833598971366883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,8,8,128,1,fp8,fp8,0,0.030868801474571227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,8,1,128,1,float16,float16,0,0.026903998851776124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,8,1,128,1,float16,fp8,0,0.03091199994087219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,8,1,128,1,fp8,fp8,0,0.030883198976516722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,8,1,128,1,float16,float16,0,0.07289440035820008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,8,2,128,1,float16,float16,0,0.026795199513435362
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,8,2,128,1,float16,fp8,0,0.03091999888420105
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,8,2,128,1,fp8,fp8,0,0.030836799740791322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,8,4,128,1,float16,float16,0,0.028971201181411742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,8,4,128,1,float16,fp8,0,0.030811199545860292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,8,4,128,1,fp8,fp8,0,0.030939200520515443
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,8,8,128,1,float16,float16,0,0.020662400126457214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,8,8,128,1,float16,fp8,0,0.018760000169277192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,8,8,128,1,fp8,fp8,0,0.019356800615787505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,8,1,128,1,float16,float16,0,0.018348799645900728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,8,1,128,1,float16,fp8,0,0.01866399943828583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,8,1,128,1,fp8,fp8,0,0.01871200054883957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,8,2,128,1,float16,float16,0,0.017001600563526155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,8,2,128,1,float16,fp8,0,0.01889120042324066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,8,2,128,1,fp8,fp8,0,0.01977760046720505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,8,4,128,1,float16,float16,0,0.018716800212860107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,8,4,128,1,float16,fp8,0,0.018980799615383147
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,8,8,128,1,float16,float16,0,0.016575999557971954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,8,8,128,1,float16,fp8,0,0.016638399660587312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,8,8,128,1,fp8,fp8,0,0.016596800088882445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,8,1,128,1,float16,float16,0,0.015072000026702882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,8,1,128,1,float16,fp8,0,0.016499200463294984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,8,1,128,1,fp8,fp8,0,0.01659359931945801
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,8,2,128,1,float16,float16,0,0.015625600516796113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,8,2,128,1,float16,fp8,0,0.01656000018119812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,8,2,128,1,fp8,fp8,0,0.016564799845218657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,8,4,128,1,float16,float16,0,0.016550399363040924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,8,4,128,1,float16,fp8,0,0.016577599942684172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,8,4,128,1,fp8,fp8,0,0.016577599942684172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,8,8,128,1,float16,float16,0,0.016521599888801575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,8,8,128,1,float16,fp8,0,0.014601600170135499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,8,8,128,1,fp8,fp8,0,0.014684799313545226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,8,1,128,1,float16,float16,0,0.01451359987258911
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,8,1,128,1,float16,fp8,0,0.01451839953660965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,8,1,128,1,fp8,fp8,0,0.01454399973154068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,8,2,128,1,float16,float16,0,0.014507199823856353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,8,2,128,1,float16,fp8,0,0.014524799585342408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,8,2,128,1,fp8,fp8,0,0.014587199687957764
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,8,4,128,1,float16,float16,0,0.014542399346828461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,8,4,128,1,float16,fp8,0,0.014564800262451171
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,8,4,128,1,fp8,fp8,0,0.020652799308300017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,8,4,128,1,fp8,fp8,0,0.014497600495815277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,8,8,128,1,float16,float16,0,0.016487999260425566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,8,8,128,1,float16,fp8,0,0.014481599628925323
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,8,1,128,1,float16,float16,0,0.014654399454593658
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,8,8,128,1,fp8,fp8,0,0.014796799421310425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,8,1,128,1,float16,fp8,0,0.014500799775123595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,8,1,128,1,fp8,fp8,0,0.014740799367427827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,8,2,128,1,float16,float16,0,0.014644800126552582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,8,2,128,1,fp8,fp8,0,0.014638400077819825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,8,4,128,1,float16,float16,0,0.015590399503707886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,8,4,128,1,float16,fp8,0,0.014766399562358857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,8,4,128,1,fp8,fp8,0,0.0146479994058609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,8,1,128,1,float16,float16,0,0.11172640323638916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,8,1,128,1,float16,fp8,0,0.15015360116958618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,8,1,128,1,fp8,fp8,0,0.14996320009231567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,8,2,128,1,float16,float16,0,0.1139407992362976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,8,2,128,1,float16,fp8,0,0.1504080057144165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,8,2,128,1,fp8,fp8,0,0.15
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,8,4,128,1,float16,float16,0,0.13435360193252563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,8,4,128,1,float16,fp8,0,0.1508512020111084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,8,4,128,1,fp8,fp8,0,0.1512079954147339
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,8,8,128,1,float16,float16,0,0.0942255973815918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,8,8,128,1,float16,fp8,0,0.08162400126457214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,8,8,128,1,fp8,fp8,0,0.08192960023880005
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,8,1,128,1,float16,float16,0,0.061768001317977904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,8,1,128,1,float16,fp8,0,0.07996479868888855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,8,1,128,1,fp8,fp8,0,0.08002240061759949
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,8,2,128,1,float16,float16,0,0.06346719861030578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,8,2,128,1,float16,fp8,0,0.08017759919166564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,8,2,128,1,fp8,fp8,0,0.08004959821701049
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,8,4,128,1,float16,float16,0,0.07309120297431945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,8,4,128,1,float16,fp8,0,0.08020319938659667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,8,4,128,1,fp8,fp8,0,0.08014240264892578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,8,8,128,1,float16,float16,0,0.05524320006370544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,8,8,128,1,float16,fp8,0,0.04731839895248413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,8,8,128,1,fp8,fp8,0,0.047244799137115476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,8,1,128,1,float16,float16,0,0.03671039938926697
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,8,1,128,1,float16,fp8,0,0.045259198546409606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,8,1,128,1,fp8,fp8,0,0.045311999320983884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,8,2,128,1,float16,float16,0,0.03706879913806915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,8,2,128,1,float16,fp8,0,0.045256000757217404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,8,4,128,1,float16,fp8,0,0.049379199743270874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,8,2,128,1,fp8,fp8,0,0.04522719979286194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,8,2,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,8,4,128,1,float16,float16,0,0.04324640035629272
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,8,4,128,1,float16,fp8,0,0.045270401239395144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,8,8,128,1,float16,float16,0,0.03052000105381012
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,8,8,128,1,float16,fp8,0,0.026817598938941957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,8,8,128,1,fp8,fp8,0,0.026822400093078614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,8,1,128,1,float16,float16,0,0.02273920029401779
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,8,1,128,1,float16,fp8,0,0.026704001426696777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,8,1,128,1,fp8,fp8,0,0.026796799898147584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,8,2,128,1,float16,float16,0,0.02268480062484741
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,8,2,128,1,float16,fp8,0,0.026796799898147584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,8,2,128,1,fp8,fp8,0,0.026824000477790832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,8,4,128,1,float16,float16,0,0.0247856006026268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,8,4,128,1,float16,fp8,0,0.026804798841476442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,8,4,128,1,fp8,fp8,0,0.02677760124206543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,8,8,128,1,float16,float16,0,0.0186271995306015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,8,8,128,1,float16,fp8,0,0.016689600050449373
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,8,8,128,1,fp8,fp8,0,0.016649599373340606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,8,1,128,1,float16,float16,0,0.01465120017528534
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,8,1,128,1,float16,fp8,0,0.01669439971446991
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,8,2,128,1,float16,float16,0,0.014603200554847717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,8,2,128,1,float16,fp8,0,0.016648000478744505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,8,2,128,1,fp8,fp8,0,0.016582399606704712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,8,4,128,1,float16,float16,0,0.01652639955282211
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,8,4,128,1,float16,fp8,0,0.01659359931945801
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,8,4,128,1,fp8,fp8,0,0.01658879965543747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,8,8,128,1,float16,float16,0,0.014596800506114959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,8,8,128,1,float16,fp8,0,0.014519999921321868
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,8,8,128,1,fp8,fp8,0,0.01448799967765808
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,8,1,128,1,float16,float16,0,0.012585599720478059
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,8,1,128,1,float16,fp8,0,0.014542399346828461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,8,1,128,1,fp8,fp8,0,0.014486399292945863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,8,2,128,1,float16,float16,0,0.012555199861526489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,8,2,128,1,float16,fp8,0,0.014519999921321868
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,8,2,128,1,fp8,fp8,0,0.01467519998550415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,8,4,128,1,float16,float16,0,0.01454399973154068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,8,4,128,1,fp8,fp8,0,0.045256000757217404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,8,4,128,1,float16,fp8,0,0.014478400349617004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,8,4,128,1,fp8,fp8,0,0.014708800613880158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,8,8,128,1,float16,float16,0,0.014478400349617004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,8,8,128,1,float16,fp8,0,0.014486399292945863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,8,1,128,1,float16,float16,0,0.012582400441169738
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,8,1,128,1,float16,fp8,0,0.01446399986743927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,8,1,128,1,fp8,fp8,0,0.014467200636863709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,8,2,128,1,float16,float16,0,0.01279039978981018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,8,2,128,1,float16,fp8,0,0.014484800398349762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,8,2,128,1,fp8,fp8,0,0.014455999433994293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,8,4,128,1,float16,float16,0,0.01266240030527115
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,8,4,128,1,float16,fp8,0,0.012729600071907043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,8,4,128,1,fp8,fp8,0,0.012724800407886505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,8,8,128,1,float16,float16,0,0.012606400251388549
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,8,8,128,1,float16,fp8,0,0.012593600153923034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,8,8,128,1,fp8,fp8,0,0.012580800056457519
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,8,1,128,1,float16,float16,0,0.01255200058221817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,8,1,128,1,float16,fp8,0,0.012646399438381195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,8,1,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,8,2,128,1,float16,float16,0,0.012595200538635254
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,8,2,128,1,float16,fp8,0,0.012604799866676331
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,8,2,128,1,fp8,fp8,0,0.012668800354003907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,8,4,128,1,float16,float16,0,0.01249760016798973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,8,4,128,1,float16,fp8,0,0.012563200294971466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,8,4,128,1,fp8,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,8,8,128,1,float16,fp8,0,0.012515200674533844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,8,8,128,1,fp8,fp8,0,0.012430399656295776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,8,1,128,1,float16,float16,0,0.012467200309038163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,8,1,128,1,float16,fp8,0,0.012529599666595458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,8,8,128,1,fp8,fp8,0,0.012532800436019897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,8,1,128,1,fp8,fp8,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,8,2,128,1,float16,float16,0,0.012556800246238708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,8,2,128,1,float16,fp8,0,0.012460800260305405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,8,4,128,1,float16,float16,0,0.012548799812793731
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,8,2,128,1,fp8,fp8,0,0.012478400021791458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,8,4,128,1,float16,fp8,0,0.012432000041007996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,8,4,128,1,fp8,fp8,0,0.012492799758911132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,8,1,128,1,fp8,fp8,0,0.016683200001716615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,8,1,128,1,float16,fp8,0,0.08854560256004333
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,8,2,128,1,float16,float16,0,0.07021600008010864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,8,1,128,1,fp8,fp8,0,0.08836479783058167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,8,2,128,1,float16,fp8,0,0.08842399716377258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,8,2,128,1,fp8,fp8,0,0.08993600010871887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,8,4,128,1,float16,float16,0,0.0794368028640747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,8,4,128,1,float16,fp8,0,0.09027680158615112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,8,4,128,1,fp8,fp8,0,0.08977280259132385
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,8,8,128,1,float16,float16,0,0.05390560030937195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,8,8,128,1,float16,fp8,0,0.049318400025367734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,8,8,128,1,fp8,fp8,0,0.0494623988866806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,8,1,128,1,float16,float16,0,0.03797439932823181
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,8,1,128,1,float16,fp8,0,0.047409600019454955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,8,1,128,1,fp8,fp8,0,0.04738239943981171
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,8,2,128,1,float16,float16,0,0.03918719887733459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,8,2,128,1,float16,fp8,0,0.04723680019378662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,8,4,128,1,float16,float16,0,0.04439679980278015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,8,4,128,1,float16,fp8,0,0.047409600019454955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,8,4,128,1,fp8,fp8,0,0.04728800058364868
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,8,8,128,1,float16,float16,0,0.01268800050020218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,8,8,128,1,float16,float16,0,0.0325984001159668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,8,8,128,1,float16,fp8,0,0.028932800889015196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,8,8,128,1,fp8,fp8,0,0.028881600499153136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,8,1,128,1,float16,float16,0,0.024726399779319765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,8,1,128,1,float16,fp8,0,0.02889440059661865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,8,1,128,1,fp8,fp8,0,0.028966400027275085
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,8,2,128,1,float16,float16,0,0.024707199633121492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,8,2,128,1,float16,fp8,0,0.02885279953479767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,8,2,128,1,fp8,fp8,0,0.028857600688934327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,8,4,128,1,float16,float16,0,0.026825600862503053
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,8,4,128,1,float16,fp8,0,0.02876960039138794
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,8,4,128,1,fp8,fp8,0,0.028939199447631837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,8,8,128,1,float16,float16,0,0.018632000684738158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,8,8,128,1,float16,fp8,0,0.018676799535751343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,8,8,128,1,fp8,fp8,0,0.01865279972553253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,8,1,128,1,float16,float16,0,0.01652960032224655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,8,1,128,1,float16,float16,0,0.06977919936180114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,8,1,128,1,float16,fp8,0,0.018624000251293182
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,8,1,128,1,fp8,fp8,0,0.018619200587272643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,8,2,128,1,float16,float16,0,0.01653439998626709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,8,2,128,1,float16,fp8,0,0.018593600392341612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,8,2,128,1,fp8,fp8,0,0.018607999384403228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,8,4,128,1,float16,float16,0,0.01663520038127899
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,8,4,128,1,float16,fp8,0,0.018592000007629395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,8,4,128,1,fp8,fp8,0,0.018572799861431122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,8,8,128,1,float16,float16,0,0.012783999741077422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,8,8,128,1,float16,fp8,0,0.012460800260305405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,8,8,128,1,fp8,fp8,0,0.012537600100040435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,8,1,128,1,float16,float16,0,0.011876799911260606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,8,1,128,1,float16,fp8,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,8,1,128,1,fp8,fp8,0,0.012468799948692322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,8,2,128,1,float16,float16,0,0.012460800260305405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,8,2,128,1,float16,fp8,0,0.01244639977812767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,8,2,128,1,fp8,fp8,0,0.012459199875593185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,8,4,128,1,float16,float16,0,0.012435200065374375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,8,4,128,1,float16,fp8,0,0.01247360035777092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,8,2,128,1,fp8,fp8,0,0.04737600088119507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,8,8,128,1,float16,float16,0,0.012358400225639343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,8,8,128,1,float16,fp8,0,0.012436799705028534
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,8,8,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,8,1,128,1,float16,float16,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,8,1,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,8,2,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,8,1,128,1,fp8,fp8,0,0.010523200035095215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,8,2,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,8,2,128,1,fp8,fp8,0,0.011027199774980545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,8,4,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,8,4,128,1,float16,fp8,0,0.010660800337791442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,8,4,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,8,8,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,8,8,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,8,1,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,8,1,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,8,1,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,8,2,128,1,float16,float16,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,8,2,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,8,2,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,8,4,128,1,float16,float16,0,0.010367999970912933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,8,4,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,8,4,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,8,8,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,8,8,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,8,8,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,8,1,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,8,1,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,8,1,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,8,2,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,8,4,128,1,fp8,fp8,0,0.012564800679683685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,8,2,128,1,float16,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,8,2,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,8,4,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,8,4,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,8,4,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,8,8,128,1,float16,float16,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,8,8,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,8,8,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,8,1,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,8,1,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,8,1,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,8,2,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,8,8,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,8,2,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,8,2,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,8,4,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,8,4,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,8,1,128,1,float16,fp8,0,0.0637279987335205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,8,1,128,1,float16,float16,0,0.05552639961242676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,8,1,128,1,fp8,fp8,0,0.06362239718437195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,8,2,128,1,float16,float16,0,0.05750719904899597
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,8,2,128,1,float16,fp8,0,0.06378560066223145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,8,2,128,1,fp8,fp8,0,0.06373760104179382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,8,4,128,1,float16,float16,0,0.061827200651168826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,8,4,128,1,float16,fp8,0,0.06373599767684937
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,8,8,128,1,float16,float16,0,0.0392304003238678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,8,4,128,1,fp8,fp8,0,0.06369119882583618
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,8,8,128,1,float16,fp8,0,0.0350928008556366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,8,8,128,1,fp8,fp8,0,0.035129600763320924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,8,1,128,1,float16,fp8,0,0.035097599029541016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,8,1,128,1,fp8,fp8,0,0.03515680134296417
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,8,2,128,1,float16,float16,0,0.031097599864006044
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,8,2,128,1,float16,fp8,0,0.035174399614334106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,8,2,128,1,fp8,fp8,0,0.03511039912700653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,8,4,128,1,float16,float16,0,0.03304960131645203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,8,4,128,1,float16,fp8,0,0.035104000568389894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,8,4,128,1,fp8,fp8,0,0.035062399506568906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,8,8,128,1,float16,float16,0,0.023311999440193177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,8,8,128,1,float16,fp8,0,0.022627200186252593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,8,8,128,1,fp8,fp8,0,0.02272160053253174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,8,1,128,1,float16,float16,0,0.02083519995212555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,8,1,128,1,float16,fp8,0,0.02258719950914383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,8,1,128,1,fp8,fp8,0,0.022777600586414336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,8,2,128,1,float16,float16,0,0.020815999805927278
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,8,2,128,1,float16,fp8,0,0.0226623997092247
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,8,2,128,1,fp8,fp8,0,0.022814400494098663
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,8,4,128,1,float16,float16,0,0.022588799893856048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,8,4,128,1,float16,fp8,0,0.022728000581264497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,8,4,128,1,fp8,fp8,0,0.022681599855422972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,8,8,128,1,float16,float16,0,0.016596800088882445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,8,8,128,1,float16,fp8,0,0.014705599844455719
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,8,8,128,1,fp8,fp8,0,0.01462399959564209
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,8,1,128,1,float16,float16,0,0.014571200311183929
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,8,1,128,1,float16,fp8,0,0.014646400511264802
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,8,1,128,1,fp8,fp8,0,0.014740799367427827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,8,4,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,8,2,128,1,float16,float16,0,0.01446239948272705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,8,2,128,1,fp8,fp8,0,0.014659200608730317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,8,4,128,1,float16,float16,0,0.014590400457382201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,8,4,128,1,float16,fp8,0,0.014665600657463074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,8,4,128,1,fp8,fp8,0,0.014699199795722961
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,8,8,128,1,float16,float16,0,0.012454400211572647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,8,1,128,1,float16,float16,0,0.030910399556159974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,8,8,128,1,float16,fp8,0,0.012403199821710587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,8,8,128,1,fp8,fp8,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,8,1,128,1,float16,float16,0,0.010609599947929382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,8,1,128,1,float16,fp8,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,8,1,128,1,fp8,fp8,0,0.012441600114107132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,8,2,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,8,2,128,1,float16,fp8,0,0.012371200323104858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,8,2,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,8,4,128,1,float16,float16,0,0.011707200109958649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,8,4,128,1,fp8,fp8,0,0.012425599992275238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,8,8,128,1,float16,float16,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,8,8,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,8,8,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,8,1,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,8,1,128,1,float16,fp8,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,8,1,128,1,fp8,fp8,0,0.010571199655532836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,8,2,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,8,2,128,1,float16,fp8,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,8,2,128,1,fp8,fp8,0,0.010355199873447418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,8,4,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,8,4,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,8,4,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,8,8,128,1,float16,float16,0,0.010335999727249145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,8,2,128,1,float16,fp8,0,0.014542399346828461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,8,8,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,8,8,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,8,1,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,8,1,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,8,1,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,8,2,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,8,2,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,8,2,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,8,4,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,8,4,128,1,float16,fp8,0,0.010332799702882766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,8,4,128,1,fp8,fp8,0,0.010353600233793258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,8,8,128,1,float16,fp8,0,0.009564799815416336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,8,1,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,8,1,128,1,float16,fp8,0,0.009217599779367447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,8,1,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,8,2,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,8,4,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,8,2,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,8,2,128,1,fp8,fp8,0,0.009428799897432328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,8,4,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,8,4,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,8,4,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,8,8,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,8,8,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,8,8,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,8,1,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,8,1,128,1,float16,fp8,0,0.010320000350475311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,8,1,128,1,fp8,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,8,2,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,8,2,128,1,float16,fp8,0,0.009321600198745728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,8,2,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,8,4,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,8,4,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,8,1,128,1,float16,float16,0,0.047366398572921756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,8,1,128,1,float16,fp8,0,0.04968160092830658
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,8,1,128,1,fp8,fp8,0,0.04989599883556366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,8,2,128,1,float16,float16,0,0.04731999933719635
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,8,2,128,1,float16,fp8,0,0.05044320225715637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,8,2,128,1,fp8,fp8,0,0.05138239860534668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,8,4,128,1,float16,float16,0,0.049409601092338565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,8,4,128,1,float16,fp8,0,0.051419198513031006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,8,4,128,1,fp8,fp8,0,0.05135200023651123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,8,8,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,8,8,128,1,float16,float16,0,0.030924800038337707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,8,8,128,1,float16,fp8,0,0.0289247989654541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,8,8,128,1,fp8,fp8,0,0.02890079915523529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,8,1,128,1,float16,float16,0,0.02889440059661865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,8,1,128,1,float16,fp8,0,0.028862398862838746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,8,1,128,1,fp8,fp8,0,0.028974398970603943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,8,2,128,1,float16,fp8,0,0.029075199365615846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,8,2,128,1,fp8,fp8,0,0.028915199637413024
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,8,4,128,1,float16,float16,0,0.02901119887828827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,8,4,128,1,float16,fp8,0,0.02900480031967163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,8,4,128,1,fp8,fp8,0,0.028958401083946227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,8,8,128,1,float16,float16,0,0.0206496000289917
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,8,8,128,1,float16,fp8,0,0.018806399405002595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,8,4,128,1,fp8,fp8,0,0.009192000329494476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,8,8,128,1,fp8,fp8,0,0.01876160055398941
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,8,1,128,1,float16,float16,0,0.018632000684738158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,8,1,128,1,float16,fp8,0,0.018750399351119995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,8,1,128,1,fp8,fp8,0,0.019312000274658202
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,8,2,128,1,float16,float16,0,0.01865919977426529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,8,2,128,1,float16,fp8,0,0.019424000382423402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,8,2,128,1,fp8,fp8,0,0.019097599387168884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,8,4,128,1,float16,float16,0,0.01873600035905838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,8,4,128,1,float16,fp8,0,0.01879040002822876
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,8,4,128,1,fp8,fp8,0,0.018822400271892546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,8,8,128,1,float16,float16,0,0.014542399346828461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,8,8,128,1,float16,fp8,0,0.014480000734329224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,8,8,128,1,float16,float16,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,8,8,128,1,fp8,fp8,0,0.014504000544548035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,8,1,128,1,float16,float16,0,0.014467200636863709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,8,1,128,1,float16,fp8,0,0.01454399973154068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,8,1,128,1,fp8,fp8,0,0.014459200203418732
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,8,2,128,1,float16,float16,0,0.014420799911022186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,8,2,128,1,float16,fp8,0,0.014372800290584565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,8,2,128,1,fp8,fp8,0,0.014451199769973755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,8,4,128,1,float16,float16,0,0.014406399428844452
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,8,4,128,1,float16,fp8,0,0.014374400675296783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,8,4,128,1,fp8,fp8,0,0.014361600577831268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,8,8,128,1,float16,float16,0,0.010633599758148194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,8,8,128,1,float16,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,8,8,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,8,1,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,8,1,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,8,1,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,8,2,128,1,float16,float16,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,8,2,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,8,2,128,1,fp8,fp8,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,8,4,128,1,float16,float16,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,8,4,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,8,4,128,1,fp8,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,8,8,128,1,float16,fp8,0,0.010283199697732925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,8,8,128,1,fp8,fp8,0,0.010364799946546554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,8,1,128,1,float16,float16,0,0.010367999970912933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,8,1,128,1,float16,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,8,1,128,1,fp8,fp8,0,0.010276799649000167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,8,2,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,8,2,128,1,float16,fp8,0,0.010366400331258773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,8,2,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,8,4,128,1,float16,float16,0,0.010320000350475311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,8,4,128,1,float16,fp8,0,0.010299199819564819
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,8,4,128,1,fp8,fp8,0,0.010364799946546554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,8,8,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,8,8,128,1,float16,fp8,0,0.010308799892663955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,8,8,128,1,fp8,fp8,0,0.010335999727249145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,8,1,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,8,1,128,1,float16,fp8,0,0.010361599922180175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,8,2,128,1,float16,float16,0,0.02884640097618103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,8,1,128,1,fp8,fp8,0,0.010283199697732925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,8,2,128,1,float16,float16,0,0.009862399846315383
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,8,2,128,1,float16,fp8,0,0.010334400087594986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,8,2,128,1,fp8,fp8,0,0.00843840017914772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,8,4,128,1,float16,float16,0,0.010315199941396713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,8,4,128,1,float16,fp8,0,0.008476799726486206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,8,4,128,1,fp8,fp8,0,0.010291200131177902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,8,8,128,1,float16,float16,0,0.00968799963593483
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,8,8,128,1,float16,fp8,0,0.009622400254011154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,8,8,128,1,fp8,fp8,0,0.009753599762916565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,8,1,128,1,float16,float16,0,0.010329599678516387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,8,1,128,1,float16,fp8,0,0.009279999881982803
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,8,1,128,1,fp8,fp8,0,0.00942559987306595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,8,2,128,1,float16,float16,0,0.009164799749851228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,8,2,128,1,float16,fp8,0,0.009480000287294389
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,8,2,128,1,fp8,fp8,0,0.009678400307893752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,8,4,128,1,float16,float16,0,0.008875200152397155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,8,4,128,1,float16,fp8,0,0.009804800152778625
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,8,4,128,1,fp8,fp8,0,0.009518399834632874
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,8,8,128,1,float16,float16,0,0.008422400057315826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,8,8,128,1,fp8,fp8,0,0.009230399876832962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,8,1,128,1,float16,float16,0,0.0086496002972126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,8,1,128,1,fp8,fp8,0,0.008529599756002426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,8,2,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,8,2,128,1,float16,fp8,0,0.008499199897050858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,8,2,128,1,fp8,fp8,0,0.008748800307512284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,8,4,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,8,4,128,1,float16,fp8,0,0.008752000331878663
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,8,4,128,1,fp8,fp8,0,0.008755200356245042
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,8,1,128,1,float16,float16,0,0.04532800018787384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,8,1,128,1,float16,fp8,0,0.04528320133686066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,8,1,128,1,fp8,fp8,0,0.04530879855155945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,8,2,128,1,float16,float16,0,0.04533439874649048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,8,2,128,1,float16,fp8,0,0.045281600952148435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,8,2,128,1,fp8,fp8,0,0.04532800018787384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,8,4,128,1,float16,float16,0,0.04723840057849884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,8,4,128,1,float16,fp8,0,0.04528799951076508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,8,4,128,1,fp8,fp8,0,0.04528000056743622
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,8,8,128,1,float16,float16,0,0.02885119915008545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,8,8,128,1,float16,fp8,0,0.026815998554229736
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,8,8,128,1,fp8,fp8,0,0.026836800575256347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,8,8,128,1,float16,float16,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,8,8,128,1,float16,fp8,0,0.0091839998960495
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,8,1,128,1,float16,fp8,0,0.026923200488090514
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,8,1,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,8,1,128,1,fp8,fp8,0,0.02686559855937958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,8,2,128,1,float16,float16,0,0.02686559855937958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,8,2,128,1,float16,fp8,0,0.026862400770187377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,8,2,128,1,fp8,fp8,0,0.026811200380325317
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,8,4,128,1,float16,float16,0,0.028835201263427736
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,8,4,128,1,float16,fp8,0,0.027003198862075806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,8,4,128,1,fp8,fp8,0,0.026836800575256347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,8,8,128,1,float16,float16,0,0.019079999625682832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,8,8,128,1,float16,fp8,0,0.018619200587272643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,8,8,128,1,fp8,fp8,0,0.018555200099945067
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,8,1,128,1,float16,float16,0,0.01865279972553253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,8,1,128,1,fp8,fp8,0,0.018544000387191773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,8,2,128,1,float16,float16,0,0.018587200343608855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,8,2,128,1,float16,fp8,0,0.018665599822998046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,8,2,128,1,fp8,fp8,0,0.018632000684738158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,8,4,128,1,float16,float16,0,0.018619200587272643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,8,4,128,1,float16,fp8,0,0.018622399866580965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,8,4,128,1,fp8,fp8,0,0.01860959976911545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,8,8,128,1,float16,float16,0,0.013400000333786011
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,8,8,128,1,float16,fp8,0,0.012569600343704223
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,8,8,128,1,fp8,fp8,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,8,1,128,1,float16,float16,0,0.012691199779510498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,8,1,128,1,float16,fp8,0,0.012479999661445617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,8,1,128,1,fp8,fp8,0,0.01257600039243698
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,8,2,128,1,float16,float16,0,0.01250240057706833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,8,2,128,1,float16,fp8,0,0.012542399764060973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,8,1,128,1,float16,float16,0,0.026976001262664796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,8,4,128,1,float16,float16,0,0.012539200484752655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,8,4,128,1,float16,fp8,0,0.012934400141239167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,8,4,128,1,fp8,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,8,8,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,8,8,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,8,8,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,8,1,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,8,1,128,1,float16,fp8,0,0.018481600284576415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,8,1,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,8,1,128,1,fp8,fp8,0,0.010308799892663955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,8,2,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,8,2,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,8,2,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,8,4,128,1,float16,float16,0,0.010532800108194351
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,8,4,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,8,4,128,1,fp8,fp8,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,8,8,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,8,8,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,8,8,128,1,fp8,fp8,0,0.009556800127029419
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,8,1,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,8,1,128,1,float16,fp8,0,0.008668799698352814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,8,1,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,8,2,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,8,2,128,1,float16,fp8,0,0.01026879996061325
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,8,2,128,1,fp8,fp8,0,0.009150400012731551
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,8,4,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,8,4,128,1,float16,fp8,0,0.00958240032196045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,8,4,128,1,fp8,fp8,0,0.010361599922180175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,8,8,128,1,float16,float16,0,0.009931199997663499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,8,8,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,8,8,128,1,fp8,fp8,0,0.00846560001373291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,8,1,128,1,float16,float16,0,0.010288000106811523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,8,1,128,1,float16,fp8,0,0.008433599770069123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,8,2,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,8,2,128,1,float16,fp8,0,0.009614399820566177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,8,2,128,1,fp8,fp8,0,0.010011199861764908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,8,2,128,1,fp8,fp8,0,0.012529599666595458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,8,4,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,8,4,128,1,float16,fp8,0,0.009851200133562088
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,8,4,128,1,fp8,fp8,0,0.008422400057315826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,8,8,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,8,8,128,1,float16,fp8,0,0.00881119966506958
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,8,1,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,8,1,128,1,float16,fp8,0,0.0084927998483181
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,8,1,128,1,fp8,fp8,0,0.008423999696969987
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,8,2,128,1,float16,float16,0,0.010364799946546554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,8,2,128,1,float16,fp8,0,0.009220799803733826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,8,2,128,1,fp8,fp8,0,0.00859839990735054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,8,4,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,8,4,128,1,float16,fp8,0,0.008720000088214875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,8,4,128,1,fp8,fp8,0,0.008449599891901017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,8,8,128,1,float16,float16,0,0.010307200253009796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,8,8,128,1,float16,fp8,0,0.0083856001496315
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,8,8,128,1,fp8,fp8,0,0.00873439982533455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,8,1,128,1,float16,float16,0,0.008454400300979614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,8,1,128,1,float16,fp8,0,0.008591999858617782
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,8,1,128,1,fp8,fp8,0,0.008396799862384795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,8,2,128,1,float16,float16,0,0.010339199751615524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,8,2,128,1,float16,fp8,0,0.008377599716186523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,8,2,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,8,4,128,1,float16,float16,0,0.008416000008583068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,8,4,128,1,fp8,fp8,0,0.008367999643087386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,0,0.044233599305152894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,0,0.041126400232315063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,8,1,128,1,fp8,fp8,0,0.0412416011095047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,0,0.04518559873104096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,0,0.04153760075569153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,8,2,128,1,fp8,fp8,0,0.0414112001657486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,8,1,128,1,fp8,fp8,0,0.009347199648618697
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,0,0.041331198811531064
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,8,4,128,1,fp8,fp8,0,0.04272159934043884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,0,0.0268640011548996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,0,0.0253248006105423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,8,8,128,1,fp8,fp8,0,0.008454400300979614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,8,8,128,1,fp8,fp8,0,0.025043201446533204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,0,0.026867198944091796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,0,0.024835200607776643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,8,1,128,1,fp8,fp8,0,0.024796800315380098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,0,0.026919999718666078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,0,0.024958400428295134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,8,2,128,1,fp8,fp8,0,0.024828800559043886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,0,0.026862400770187377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,0,0.024940800666809083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,8,4,128,1,fp8,fp8,0,0.0249439999461174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,0,0.01852319985628128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,8,4,128,1,float16,fp8,0,0.010308799892663955
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,0,0.017696000635623932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,8,8,128,1,fp8,fp8,0,0.017100800573825837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,0,0.01857600063085556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,0,0.016760000586509706
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,0,0.01852640062570572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,0,0.016655999422073364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,8,2,128,1,fp8,fp8,0,0.016686399281024934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,0,0.0186831995844841
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,0,0.01737920045852661
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,8,4,128,1,fp8,fp8,0,0.017313599586486816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,0,0.045177599787712096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,0,0.014452800154685974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,8,8,128,1,fp8,fp8,0,0.01255040019750595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,0,0.012756800651550293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,0,0.012598399817943574
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,8,1,128,1,fp8,fp8,0,0.01249760016798973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,0,0.014499199390411378
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,8,2,128,1,fp8,fp8,0,0.012534399330615998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,0,0.012545600533485413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,0,0.01255200058221817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,8,4,128,1,fp8,fp8,0,0.012520000338554382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,8,8,128,1,fp8,fp8,0,0.010278400033712387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,0,0.010380800068378448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,8,1,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,8,2,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,0,0.0103472001850605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,8,4,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,0,0.008675199747085572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,8,8,128,1,fp8,fp8,0,0.010279999673366546
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,0,0.01035040020942688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,8,1,128,1,fp8,fp8,0,0.010328000038862228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,8,1,128,1,fp8,fp8,0,0.0173007994890213
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,0,0.010344000160694122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,8,2,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,0,0.010356800258159637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,8,4,128,1,fp8,fp8,0,0.010265599936246872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,0,0.010334400087594986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,0,0.009644799679517747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,8,8,128,1,fp8,fp8,0,0.0103472001850605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,0,0.010260800272226334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,8,1,128,1,fp8,fp8,0,0.010284800082445145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,0,0.010320000350475311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,8,2,128,1,fp8,fp8,0,0.009247999638319016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,0,0.010339199751615524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,0,0.009089600294828415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,8,4,128,1,fp8,fp8,0,0.01032159999012947
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,0,0.010062400251626968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,0,0.010329599678516387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,8,8,128,1,fp8,fp8,0,0.008425600081682205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,0,0.008430399745702744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,8,1,128,1,fp8,fp8,0,0.008515200018882752
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,0,0.009662400186061858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,0,0.008526399731636047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,8,2,128,1,fp8,fp8,0,0.008416000008583068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,0,0.010342399775981902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,0,0.008369600027799606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,8,4,128,1,fp8,fp8,0,0.008504000306129456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,0,0.009511999785900116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,8,8,128,1,fp8,fp8,0,0.008564800024032593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,0,0.010329599678516387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,0,0.0103472001850605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,8,1,128,1,fp8,fp8,0,0.010310400277376175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,0,0.010241600126028061
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,8,2,128,1,fp8,fp8,0,0.010328000038862228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,0,0.010344000160694122
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,0,0.010264000296592713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,8,4,128,1,fp8,fp8,0,0.010318399965763092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,4,1,128,1,float16,fp8,0,1.1436047554016113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,0,0.008420799672603608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,4,1,128,1,fp8,fp8,0,1.1438447952270507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,4,2,128,1,float16,float16,0,1.2988816261291505
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,4,2,128,1,float16,fp8,0,1.1479999542236328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,4,2,128,1,fp8,fp8,0,1.143779182434082
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,4,4,128,1,float16,fp8,0,0.6215439796447754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,4,4,128,1,float16,float16,0,0.7236288070678711
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,4,4,128,1,fp8,fp8,0,0.6191664218902588
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,4,1,128,1,float16,float16,0,1.2716128349304199
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,4,1,128,1,float16,fp8,0,0.6148928165435791
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,4,1,128,1,float16,float16,0,0.7021535873413086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,4,1,128,1,fp8,fp8,0,0.6134367942810058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,4,2,128,1,float16,fp8,0,0.6232927799224853
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,4,2,128,1,float16,float16,0,0.6961855888366699
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,4,2,128,1,fp8,fp8,0,0.6156191825866699
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,4,4,128,1,float16,fp8,0,0.3540240049362183
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,4,4,128,1,float16,float16,0,0.41798081398010256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,4,4,128,1,fp8,fp8,0,0.352345609664917
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,4,1,128,1,float16,float16,0,0.38823039531707765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,4,1,128,1,fp8,fp8,0,0.3499936103820801
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,4,2,128,1,float16,float16,0,0.3914144039154053
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,4,2,128,1,float16,fp8,0,0.3502943992614746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,4,2,128,1,fp8,fp8,0,0.3505183935165405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,4,4,128,1,float16,float16,0,0.24648799896240234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,4,4,128,1,float16,fp8,0,0.21954560279846191
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,4,4,128,1,fp8,fp8,0,0.2196336030960083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,4,1,128,1,float16,float16,0,0.24155039787292482
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,4,1,128,1,float16,fp8,0,0.22021119594573973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,4,1,128,1,fp8,fp8,0,0.21948640346527098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,4,2,128,1,float16,float16,0,0.23974080085754396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,4,2,128,1,float16,fp8,0,0.21924800872802735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,4,2,128,1,fp8,fp8,0,0.21946399211883544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,4,1,128,1,float16,float16,0,0.7657551765441895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,4,1,128,1,float16,fp8,0,0.7001760005950928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,4,1,128,1,float16,fp8,0,0.3507904052734375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,4,1,128,1,fp8,fp8,0,0.7015647888183594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,4,2,128,1,float16,float16,0,0.7629951953887939
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,4,2,128,1,float16,fp8,0,0.7073455810546875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,4,4,128,1,float16,float16,0,0.4409664154052734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,4,4,128,1,float16,fp8,0,0.39001600742340087
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,4,4,128,1,fp8,fp8,0,0.39067840576171875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,4,1,128,1,float16,float16,0,0.4111167907714844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,4,1,128,1,fp8,fp8,0,0.38532960414886475
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,4,2,128,1,float16,float16,0,0.4194943904876709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,4,2,128,1,float16,fp8,0,0.38738560676574707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,4,4,128,1,float16,float16,0,0.2680880069732666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,4,2,128,1,fp8,fp8,0,0.3852384090423584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,4,4,128,1,float16,fp8,0,0.22607040405273438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,4,4,128,1,fp8,fp8,0,0.22595839500427245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,4,1,128,1,float16,float16,0,0.24426400661468506
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,4,1,128,1,fp8,fp8,0,0.22547519207000732
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,4,2,128,1,float16,float16,0,0.24702239036560059
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,4,2,128,1,float16,fp8,0,0.22613279819488524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,4,2,128,1,fp8,fp8,0,0.22605280876159667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,4,2,128,1,fp8,fp8,0,0.7057119846343994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,4,4,128,1,float16,float16,0,0.15700960159301758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,4,4,128,1,float16,fp8,0,0.1436895966529846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,4,1,128,1,float16,fp8,0,0.38402400016784666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,4,4,128,1,fp8,fp8,0,0.145524799823761
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,4,1,128,1,float16,fp8,0,0.14358880519866943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,4,1,128,1,fp8,fp8,0,0.14355360269546508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,4,2,128,1,float16,float16,0,0.1523360013961792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,4,2,128,1,float16,fp8,0,0.14451999664306642
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,4,2,128,1,fp8,fp8,0,0.14363679885864258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,4,1,128,1,float16,float16,0,0.5668272018432617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,4,1,128,1,float16,fp8,0,0.521398401260376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,4,1,128,1,float16,fp8,0,0.22525439262390137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,4,1,128,1,fp8,fp8,0,0.5236447811126709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,4,2,128,1,float16,float16,0,0.5562719821929931
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,4,2,128,1,float16,fp8,0,0.5233920097351075
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,4,2,128,1,fp8,fp8,0,0.525812816619873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,4,4,128,1,float16,float16,0,0.3316688060760498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,4,4,128,1,float16,fp8,0,0.29406399726867677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,4,4,128,1,fp8,fp8,0,0.29518558979034426
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,4,1,128,1,float16,float16,0,0.3036335945129395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,4,1,128,1,float16,fp8,0,0.2889823913574219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,4,1,128,1,fp8,fp8,0,0.28937439918518065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,4,2,128,1,float16,float16,0,0.3110352039337158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,4,2,128,1,float16,fp8,0,0.28980159759521484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,4,1,128,1,float16,float16,0,0.15357279777526855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,4,2,128,1,fp8,fp8,0,0.29101440906524656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,4,4,128,1,float16,fp8,0,0.17629120349884034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,4,1,128,1,float16,float16,0,0.18686239719390868
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,4,4,128,1,fp8,fp8,0,0.17636799812316895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,4,1,128,1,float16,fp8,0,0.17580480575561525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,4,1,128,1,fp8,fp8,0,0.176363205909729
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,4,2,128,1,float16,float16,0,0.18790559768676757
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,4,2,128,1,float16,fp8,0,0.17641760110855104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,4,4,128,1,float16,float16,0,0.11542079448699952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,4,4,128,1,float16,fp8,0,0.10851520299911499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,4,4,128,1,fp8,fp8,0,0.10846879482269287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,4,1,128,1,float16,float16,0,0.11291999816894531
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,4,1,128,1,float16,fp8,0,0.10678399801254272
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,4,1,128,1,fp8,fp8,0,0.10790879726409912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,4,2,128,1,float16,float16,0,0.11296319961547852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,4,2,128,1,float16,fp8,0,0.1069983959197998
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,4,2,128,1,fp8,fp8,0,0.10858240127563476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,4,1,128,1,float16,float16,0,0.6947455883026123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,4,1,128,1,float16,fp8,0,0.6835072040557861
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,4,1,128,1,fp8,fp8,0,0.6875616073608398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,4,2,128,1,float16,float16,0,0.7128767967224121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,4,4,128,1,float16,float16,0,0.19925279617309571
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,4,2,128,1,float16,fp8,0,0.688700819015503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,4,4,128,1,float16,float16,0,0.40975198745727537
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,4,2,128,1,fp8,fp8,0,0.6862559795379639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,4,2,128,1,fp8,fp8,0,0.17656160593032838
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,4,4,128,1,fp8,fp8,0,0.37086238861083987
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,4,1,128,1,float16,float16,0,0.3771536111831665
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,4,1,128,1,float16,fp8,0,0.3675343990325928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,4,1,128,1,fp8,fp8,0,0.36567039489746095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,4,2,128,1,float16,float16,0,0.39235520362854004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,4,2,128,1,float16,fp8,0,0.3671024084091187
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,4,4,128,1,float16,float16,0,0.23536319732666017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,4,2,128,1,fp8,fp8,0,0.3698319911956787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,4,4,128,1,float16,fp8,0,0.21039040088653566
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,4,4,128,1,fp8,fp8,0,0.21171040534973146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,4,1,128,1,float16,float16,0,0.21071040630340576
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,4,1,128,1,float16,fp8,0,0.20850560665130616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,4,1,128,1,fp8,fp8,0,0.20709760189056398
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,4,2,128,1,float16,float16,0,0.2198415994644165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,4,2,128,1,float16,fp8,0,0.20752639770507814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,4,2,128,1,fp8,fp8,0,0.20924479961395265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,4,4,128,1,float16,float16,0,0.1374511957168579
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,4,4,128,1,float16,fp8,0,0.1302639961242676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,4,4,128,1,fp8,fp8,0,0.1293280005455017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,4,1,128,1,float16,float16,0,0.1342144012451172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,4,1,128,1,float16,fp8,0,0.1293503999710083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,4,1,128,1,fp8,fp8,0,0.12943359613418579
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,4,2,128,1,float16,fp8,0,0.1312656044960022
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,4,2,128,1,fp8,fp8,0,0.130075204372406
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,4,4,128,1,float16,float16,0,0.0883520007133484
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,4,4,128,1,float16,fp8,0,0.08412479758262634
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,4,4,128,1,fp8,fp8,0,0.08414400219917298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,4,1,128,1,float16,float16,0,0.08628640174865723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,4,1,128,1,float16,fp8,0,0.08400800228118896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,4,1,128,1,fp8,fp8,0,0.0822928011417389
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,4,2,128,1,float16,float16,0,0.08621280193328858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,4,2,128,1,float16,fp8,0,0.0842415988445282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,4,2,128,1,fp8,fp8,0,0.08409600257873535
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,4,1,128,1,float16,float16,0,0.436411190032959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,4,4,128,1,float16,fp8,0,0.3707504034042358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,4,1,128,1,float16,fp8,0,0.43440961837768555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,4,2,128,1,float16,float16,0,0.13331199884414674
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,4,2,128,1,float16,float16,0,0.43001599311828614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,4,2,128,1,float16,fp8,0,0.4364640235900879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,4,2,128,1,fp8,fp8,0,0.4365375995635986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,4,4,128,1,float16,float16,0,0.2614543914794922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,4,4,128,1,float16,fp8,0,0.23989760875701904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,4,4,128,1,fp8,fp8,0,0.24078240394592285
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,4,1,128,1,float16,float16,0,0.23031680583953856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,4,1,128,1,float16,fp8,0,0.23650240898132324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,4,1,128,1,fp8,fp8,0,0.2360383987426758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,4,2,128,1,float16,float16,0,0.23720960617065429
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,4,2,128,1,float16,fp8,0,0.2380143880844116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,4,4,128,1,float16,float16,0,0.1540287971496582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,4,2,128,1,fp8,fp8,0,0.23774080276489257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,4,4,128,1,float16,fp8,0,0.1373952031135559
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,4,4,128,1,fp8,fp8,0,0.1374848008155823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,4,1,128,1,float16,float16,0,0.1332144021987915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,4,1,128,1,float16,fp8,0,0.13736319541931152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,4,2,128,1,float16,float16,0,0.13513760566711425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,4,2,128,1,float16,fp8,0,0.13734400272369385
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,4,4,128,1,float16,float16,0,0.09027519822120667
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,4,4,128,1,float16,fp8,0,0.08629119992256165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,4,4,128,1,fp8,fp8,0,0.08732320070266723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,4,1,128,1,float16,float16,0,0.08632320165634155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,4,1,128,1,float16,fp8,0,0.0870639979839325
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,4,1,128,1,fp8,fp8,0,0.43595519065856936
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,4,1,128,1,fp8,fp8,0,0.08822240233421326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,4,2,128,1,float16,float16,0,0.08763039708137513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,4,2,128,1,float16,fp8,0,0.08774080276489257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,4,2,128,1,fp8,fp8,0,0.08756800293922425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,4,4,128,1,float16,float16,0,0.06778720021247864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,4,4,128,1,float16,fp8,0,0.06562560200691223
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,4,4,128,1,fp8,fp8,0,0.06567360162734985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,4,1,128,1,float16,float16,0,0.06775199770927429
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,4,1,128,1,float16,fp8,0,0.06576799750328063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,4,1,128,1,fp8,fp8,0,0.0657039999961853
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,4,2,128,1,float16,float16,0,0.06572960019111633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,4,2,128,1,float16,fp8,0,0.06568639874458312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,4,2,128,1,fp8,fp8,0,0.06447200179100036
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,4,1,128,1,float16,float16,0,0.4153439998626709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,4,1,128,1,float16,fp8,0,0.4468031883239746
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,4,1,128,1,fp8,fp8,0,0.44845919609069823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,4,2,128,1,float16,float16,0,0.4163424015045166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,4,1,128,1,fp8,fp8,0,0.13725119829177856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,4,2,128,1,fp8,fp8,0,0.1373855948448181
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,4,2,128,1,float16,fp8,0,0.4510176181793213
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,4,4,128,1,float16,float16,0,0.25269920825958253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,4,2,128,1,fp8,fp8,0,0.44943838119506835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,4,4,128,1,float16,fp8,0,0.2424015998840332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,4,4,128,1,fp8,fp8,0,0.24349920749664306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,4,1,128,1,float16,float16,0,0.21903519630432128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,4,1,128,1,float16,fp8,0,0.23882079124450684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,4,1,128,1,fp8,fp8,0,0.2388144016265869
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,4,2,128,1,float16,float16,0,0.22815680503845215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,4,2,128,1,float16,fp8,0,0.2395872116088867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,4,2,128,1,fp8,fp8,0,0.24003360271453858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,4,4,128,1,float16,float16,0,0.14513280391693115
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,4,4,128,1,float16,fp8,0,0.13705120086669922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,4,4,128,1,fp8,fp8,0,0.13604960441589356
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,4,1,128,1,float16,float16,0,0.1233247995376587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,4,1,128,1,float16,fp8,0,0.13368959426879884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,4,1,128,1,fp8,fp8,0,0.13376799821853638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,4,2,128,1,float16,fp8,0,0.1337551951408386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,4,2,128,1,fp8,fp8,0,0.13346079587936402
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,4,4,128,1,float16,float16,0,0.08431040048599243
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,4,4,128,1,float16,fp8,0,0.08221759796142578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,4,4,128,1,fp8,fp8,0,0.08232640027999878
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,4,1,128,1,float16,float16,0,0.07985919713973999
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,4,1,128,1,float16,fp8,0,0.08227840065956116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,4,1,128,1,fp8,fp8,0,0.08397600054740906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,4,2,128,1,float16,float16,0,0.07804319858551026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,4,2,128,1,float16,fp8,0,0.08401920199394226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,4,4,128,1,float16,float16,0,0.053491199016571046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,4,2,128,1,fp8,fp8,0,0.08216959834098816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,4,4,128,1,float16,fp8,0,0.051500797271728516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,4,4,128,1,fp8,fp8,0,0.051470398902893066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,4,1,128,1,float16,float16,0,0.05010560154914856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,4,1,128,1,float16,fp8,0,0.05155040025711059
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,4,1,128,1,fp8,fp8,0,0.051419198513031006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,4,2,128,1,float16,float16,0,0.04946399927139282
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,4,2,128,1,float16,fp8,0,0.05146719813346863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,4,2,128,1,fp8,fp8,0,0.05153120160102844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,4,4,128,1,float16,float16,0,0.049296000599861146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,4,4,128,1,float16,fp8,0,0.04735040068626404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,4,4,128,1,fp8,fp8,0,0.047260800004005434
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,4,1,128,1,float16,float16,0,0.047336000204086306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,4,1,128,1,float16,fp8,0,0.047356799244880676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,4,1,128,1,fp8,fp8,0,0.04738560020923614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,4,2,128,1,float16,float16,0,0.04739519953727722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,4,2,128,1,float16,fp8,0,0.047295999526977536
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,4,2,128,1,fp8,fp8,0,0.04737919867038727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,4,1,128,1,float16,float16,0,0.25692000389099123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,4,1,128,1,float16,fp8,0,0.2977744102478027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,4,1,128,1,fp8,fp8,0,0.29614720344543455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,4,2,128,1,float16,float16,0,0.26840639114379883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,4,2,128,1,float16,float16,0,0.12641600370407105
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,4,2,128,1,float16,fp8,0,0.29828639030456544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,4,4,128,1,float16,float16,0,0.16482880115509033
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,4,2,128,1,fp8,fp8,0,0.2996016025543213
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,4,4,128,1,float16,fp8,0,0.16411839723587035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,4,4,128,1,fp8,fp8,0,0.16402080059051513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,4,1,128,1,float16,float16,0,0.14345920085906982
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,4,1,128,1,float16,fp8,0,0.1596959948539734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,4,1,128,1,fp8,fp8,0,0.15999360084533693
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,4,2,128,1,float16,float16,0,0.14799200296401976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,4,2,128,1,float16,fp8,0,0.160971200466156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,4,4,128,1,float16,float16,0,0.09724159836769104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,4,2,128,1,fp8,fp8,0,0.16098719835281372
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,4,4,128,1,float16,fp8,0,0.09232159852981567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,4,4,128,1,fp8,fp8,0,0.09234240055084228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,4,1,128,1,float16,fp8,0,0.09204000234603882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,4,1,128,1,fp8,fp8,0,0.09240159988403321
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,4,2,128,1,float16,float16,0,0.08222879767417908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,4,2,128,1,float16,fp8,0,0.09231520295143128
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,4,2,128,1,fp8,fp8,0,0.09243680238723755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,4,4,128,1,float16,float16,0,0.05871679782867432
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,4,4,128,1,float16,fp8,0,0.057713598012924194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,4,4,128,1,fp8,fp8,0,0.05766400098800659
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,4,1,128,1,float16,float16,0,0.05350559949874878
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,4,1,128,1,float16,fp8,0,0.057601600885391235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,4,1,128,1,fp8,fp8,0,0.057467198371887206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,4,2,128,1,float16,float16,0,0.053940802812576294
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,4,2,128,1,fp8,fp8,0,0.057657599449157715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,4,2,128,1,float16,fp8,0,0.057708799839019775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,4,4,128,1,float16,fp8,0,0.0412447988986969
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,4,4,128,1,fp8,fp8,0,0.041280001401901245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,4,1,128,1,float16,float16,0,0.03915199935436249
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,4,1,128,1,float16,fp8,0,0.04132159948348999
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,4,1,128,1,fp8,fp8,0,0.04121760129928589
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,4,2,128,1,float16,fp8,0,0.04114879965782166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,4,2,128,1,fp8,fp8,0,0.04118399918079376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,4,4,128,1,float16,float16,0,0.03923520147800445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,4,4,128,1,float16,fp8,0,0.03810240030288696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,4,4,128,1,fp8,fp8,0,0.03776479959487915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,4,1,128,1,float16,float16,0,0.03906559944152832
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,4,1,128,1,float16,fp8,0,0.037215998768806456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,4,1,128,1,fp8,fp8,0,0.03718239963054657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,4,2,128,1,float16,float16,0,0.039156800508499144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,4,2,128,1,float16,fp8,0,0.03879840075969696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,4,1,128,1,float16,float16,0,0.08026559948921204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,4,2,128,1,fp8,fp8,0,0.0389631986618042
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,4,1,128,1,float16,float16,0,0.2585599899291992
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,4,1,128,1,float16,fp8,0,0.3264496088027954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,4,1,128,1,fp8,fp8,0,0.32730560302734374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,4,2,128,1,float16,float16,0,0.2672607898712158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,4,2,128,1,float16,fp8,0,0.3280688047409058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,4,4,128,1,float16,float16,0,0.04129120111465454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,4,4,128,1,float16,float16,0,0.16793760061264038
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,4,4,128,1,float16,fp8,0,0.1766991972923279
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,4,4,128,1,fp8,fp8,0,0.17635040283203124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,4,1,128,1,float16,float16,0,0.14196959733963013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,4,2,128,1,float16,float16,0,0.03943040072917938
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,4,1,128,1,float16,fp8,0,0.17229119539260865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,4,1,128,1,fp8,fp8,0,0.17443039417266845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,4,2,128,1,float16,float16,0,0.1461295962333679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,4,2,128,1,float16,fp8,0,0.1740831971168518
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,4,4,128,1,float16,float16,0,0.09654880166053773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,4,2,128,1,fp8,fp8,0,0.17464959621429443
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,4,4,128,1,float16,fp8,0,0.09697759747505189
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,4,4,128,1,fp8,fp8,0,0.0980288028717041
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,4,1,128,1,float16,float16,0,0.079995197057724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,4,1,128,1,float16,fp8,0,0.09476959705352783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,4,1,128,1,fp8,fp8,0,0.09461280107498168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,4,2,128,1,float16,float16,0,0.08357759714126586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,4,2,128,1,float16,fp8,0,0.09487040042877197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,4,2,128,1,fp8,fp8,0,0.09634079933166503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,4,4,128,1,float16,float16,0,0.0571008026599884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,4,4,128,1,float16,fp8,0,0.05758559703826904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,4,4,128,1,fp8,fp8,0,0.057999998331069946
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,4,1,128,1,float16,fp8,0,0.05764639973640442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,4,1,128,1,fp8,fp8,0,0.05758240222930908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,4,2,128,1,float16,float16,0,0.05103200078010559
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,4,2,128,1,float16,fp8,0,0.05794559717178345
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,4,2,128,1,fp8,fp8,0,0.05827040076255798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,4,4,128,1,float16,float16,0,0.03507519960403442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,4,4,128,1,float16,fp8,0,0.036985599994659425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,4,4,128,1,fp8,fp8,0,0.03703039884567261
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,4,2,128,1,fp8,fp8,0,0.3280735969543457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,4,1,128,1,float16,float16,0,0.03290719985961914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,4,1,128,1,float16,fp8,0,0.0350928008556366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,4,1,128,1,fp8,fp8,0,0.03506079912185669
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,4,2,128,1,float16,float16,0,0.033004799485206605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,4,2,128,1,float16,fp8,0,0.03608959913253784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,4,2,128,1,fp8,fp8,0,0.03516480028629303
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,4,4,128,1,float16,float16,0,0.03094559907913208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,4,4,128,1,float16,fp8,0,0.03083840012550354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,4,4,128,1,fp8,fp8,0,0.030963200330734252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,4,1,128,1,float16,float16,0,0.029164800047874452
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,4,1,128,1,float16,fp8,0,0.030908799171447753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,4,1,128,1,fp8,fp8,0,0.030988800525665283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,4,2,128,1,float16,float16,0,0.029233598709106447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,4,2,128,1,float16,fp8,0,0.031009599566459656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,4,2,128,1,fp8,fp8,0,0.030904000997543334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,4,4,128,1,float16,float16,0,0.02961600124835968
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,4,4,128,1,float16,fp8,0,0.028915199637413024
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,4,4,128,1,fp8,fp8,0,0.02893120050430298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,4,1,128,1,float16,float16,0,0.028984001278877257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,4,1,128,1,float16,fp8,0,0.028799998760223388
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,4,1,128,1,fp8,fp8,0,0.02880159914493561
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,4,2,128,1,float16,float16,0,0.029094401001930236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,4,2,128,1,float16,fp8,0,0.028940799832344054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,4,1,128,1,float16,float16,0,0.049558401107788086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,4,2,128,1,fp8,fp8,0,0.02885279953479767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,4,1,128,1,float16,float16,0,0.17499200105667115
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,4,1,128,1,float16,fp8,0,0.22660479545593262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,4,1,128,1,fp8,fp8,0,0.22589120864868165
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,4,2,128,1,float16,float16,0,0.1801103949546814
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,4,2,128,1,float16,fp8,0,0.2283616065979004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,4,2,128,1,fp8,fp8,0,0.2279871940612793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,4,4,128,1,float16,float16,0,0.11692960262298584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,4,4,128,1,float16,fp8,0,0.12486399412155151
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,4,4,128,1,fp8,fp8,0,0.12482399940490722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,4,1,128,1,float16,fp8,0,0.12099839448928833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,4,1,128,1,fp8,fp8,0,0.12098239660263062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,4,2,128,1,float16,float16,0,0.09860799908638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,4,2,128,1,float16,fp8,0,0.12216960191726685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,4,2,128,1,fp8,fp8,0,0.12154079675674438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,4,4,128,1,float16,fp8,0,0.06949759721755981
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,4,4,128,1,fp8,fp8,0,0.06934400200843811
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,4,1,128,1,float16,float16,0,0.05572959780693054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,4,1,128,1,float16,fp8,0,0.06803200244903565
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,4,1,128,1,fp8,fp8,0,0.06895999908447266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,4,2,128,1,float16,float16,0,0.057169598340988156
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,4,2,128,1,fp8,fp8,0,0.06819360256195069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,4,2,128,1,float16,fp8,0,0.06936640143394471
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,4,4,128,1,float16,float16,0,0.04117920100688934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,4,4,128,1,float16,fp8,0,0.04332480132579804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,4,4,128,1,fp8,fp8,0,0.04322879910469055
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,4,1,128,1,float16,float16,0,0.03730080127716064
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,4,1,128,1,float16,fp8,0,0.04320639967918396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,4,2,128,1,float16,float16,0,0.0370608001947403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,4,1,128,1,fp8,fp8,0,0.04329279959201813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,4,2,128,1,float16,fp8,0,0.04323520064353943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,4,2,128,1,fp8,fp8,0,0.043268799781799316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,4,4,128,1,float16,float16,0,0.02686080038547516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,4,4,128,1,float16,fp8,0,0.028908801078796387
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,4,4,128,1,fp8,fp8,0,0.02887359857559204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,4,1,128,1,float16,fp8,0,0.028838399052619933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,4,1,128,1,fp8,fp8,0,0.028857600688934327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,4,2,128,1,float16,float16,0,0.026769599318504332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,4,2,128,1,float16,fp8,0,0.028867200016975403
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,4,2,128,1,fp8,fp8,0,0.028809601068496705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,4,1,128,1,float16,float16,0,0.09633600115776061
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,4,4,128,1,float16,float16,0,0.024820800125598907
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,4,4,128,1,float16,fp8,0,0.026734399795532226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,4,1,128,1,float16,float16,0,0.024825599789619446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,4,1,128,1,float16,fp8,0,0.024903999269008638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,4,1,128,1,fp8,fp8,0,0.02484479993581772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,4,4,128,1,float16,float16,0,0.06848319768905639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,4,2,128,1,float16,float16,0,0.024855999648571013
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,4,2,128,1,float16,fp8,0,0.026796799898147584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,4,2,128,1,fp8,fp8,0,0.026707199215888978
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,4,4,128,1,float16,fp8,0,0.024742400646209715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,4,4,128,1,fp8,fp8,0,0.02473440021276474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,4,1,128,1,float16,float16,0,0.024676799774169922
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,4,1,128,1,float16,fp8,0,0.024672000110149382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,4,1,128,1,fp8,fp8,0,0.02476000040769577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,4,2,128,1,float16,float16,0,0.024675199389457704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,4,2,128,1,float16,fp8,0,0.02475679963827133
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,4,2,128,1,fp8,fp8,0,0.024728000164031982
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,4,1,128,1,float16,float16,0,0.18829439878463744
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,4,1,128,1,float16,fp8,0,0.26635839939117434
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,4,1,128,1,fp8,fp8,0,0.26621758937835693
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,4,2,128,1,float16,float16,0,0.19405280351638793
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,4,4,128,1,fp8,fp8,0,0.024740800261497498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,4,2,128,1,float16,fp8,0,0.267523193359375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,4,2,128,1,fp8,fp8,0,0.26753599643707277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,4,4,128,1,float16,float16,0,0.125547194480896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,4,4,128,1,float16,float16,0,0.024769599735736846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,4,4,128,1,float16,fp8,0,0.14308639764785766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,4,4,128,1,fp8,fp8,0,0.1424064040184021
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,4,1,128,1,float16,float16,0,0.10125759840011597
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,4,1,128,1,float16,fp8,0,0.13955199718475342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,4,1,128,1,fp8,fp8,0,0.13985439538955688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,4,2,128,1,float16,float16,0,0.10469280481338501
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,4,2,128,1,float16,fp8,0,0.14166079759597777
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,4,2,128,1,fp8,fp8,0,0.14089280366897583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,4,4,128,1,float16,fp8,0,0.07809600234031677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,4,4,128,1,fp8,fp8,0,0.07809439897537232
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,4,1,128,1,float16,float16,0,0.05752159953117371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,4,1,128,1,float16,float16,0,0.02688640058040619
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,4,1,128,1,float16,fp8,0,0.07602880001068116
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,4,1,128,1,fp8,fp8,0,0.07602080106735229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,4,2,128,1,float16,fp8,0,0.07605760097503662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,4,2,128,1,fp8,fp8,0,0.07595199942588807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,4,4,128,1,float16,float16,0,0.04254719913005829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,4,4,128,1,float16,fp8,0,0.04526880085468292
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,4,4,128,1,fp8,fp8,0,0.045388799905776975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,4,1,128,1,float16,float16,0,0.03699199855327606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,4,1,128,1,float16,fp8,0,0.045239999890327454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,4,1,128,1,fp8,fp8,0,0.04521119892597199
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,4,2,128,1,float16,float16,0,0.03604960143566131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,4,2,128,1,float16,fp8,0,0.045239999890327454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,4,2,128,1,fp8,fp8,0,0.045219200849533084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,4,4,128,1,float16,float16,0,0.024747200310230255
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,4,4,128,1,float16,fp8,0,0.026833599805831908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,4,4,128,1,fp8,fp8,0,0.0268095999956131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,4,1,128,1,float16,float16,0,0.022785599529743194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,4,1,128,1,float16,fp8,0,0.02680320143699646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,4,1,128,1,fp8,fp8,0,0.026907199621200563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,4,2,128,1,float16,float16,0,0.022707200050354003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,4,2,128,1,float16,fp8,0,0.02688640058040619
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,4,2,128,1,fp8,fp8,0,0.02680320143699646
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,4,4,128,1,float16,float16,0,0.020745599269866945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,4,4,128,1,float16,fp8,0,0.022654399275779724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,4,4,128,1,fp8,fp8,0,0.022862400114536285
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,4,1,128,1,float16,float16,0,0.020641599595546723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,4,1,128,1,float16,fp8,0,0.02279040068387985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,4,1,128,1,fp8,fp8,0,0.02269600033760071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,4,2,128,1,float16,float16,0,0.0208064004778862
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,4,2,128,1,float16,fp8,0,0.02282399982213974
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,4,2,128,1,fp8,fp8,0,0.022819200158119203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,4,4,128,1,float16,float16,0,0.02075839936733246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,4,4,128,1,float16,fp8,0,0.020715199410915375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,4,4,128,1,fp8,fp8,0,0.020633600652217865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,4,1,128,1,float16,float16,0,0.020729599893093108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,4,1,128,1,float16,fp8,0,0.02059520035982132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,4,1,128,1,fp8,fp8,0,0.021316799521446227
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,4,2,128,1,float16,float16,0,0.020638400316238405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,4,2,128,1,float16,fp8,0,0.020732800662517547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,4,2,128,1,fp8,fp8,0,0.020710399746894835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,4,4,128,1,float16,float16,0,0.020606400072574617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,4,4,128,1,float16,fp8,0,0.01863519996404648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,4,4,128,1,fp8,fp8,0,0.018771199882030486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,4,1,128,1,float16,float16,0,0.02011840045452118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,4,1,128,1,float16,fp8,0,0.018783999979496
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,4,1,128,1,fp8,fp8,0,0.018620799481868743
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,4,2,128,1,float16,float16,0,0.019699199497699736
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,4,2,128,1,float16,fp8,0,0.018612800538539885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,4,2,128,1,fp8,fp8,0,0.018771199882030486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,4,4,128,1,float16,float16,0,0.0723792016506195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,4,1,128,1,float16,float16,0,0.14964159727096557
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,4,1,128,1,float16,fp8,0,0.23431520462036132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,4,1,128,1,fp8,fp8,0,0.2340240001678467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,4,2,128,1,float16,float16,0,0.1535647988319397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,4,2,128,1,float16,fp8,0,0.23564159870147705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,4,2,128,1,fp8,fp8,0,0.23519840240478515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,4,4,128,1,float16,fp8,0,0.12528959512710572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,4,4,128,1,fp8,fp8,0,0.12511680126190186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,4,2,128,1,float16,float16,0,0.05980160236358643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,4,1,128,1,float16,float16,0,0.08254240155220031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,4,1,128,1,float16,fp8,0,0.12308160066604615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,4,2,128,1,float16,float16,0,0.08490239977836608
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,4,1,128,1,fp8,fp8,0,0.12307679653167725
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,4,2,128,1,float16,fp8,0,0.12400480508804321
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,4,4,128,1,float16,float16,0,0.05965120196342468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,4,4,128,1,float16,fp8,0,0.06784480214118957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,4,4,128,1,fp8,fp8,0,0.06813920140266419
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,4,1,128,1,float16,float16,0,0.04698080122470856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,4,1,128,1,float16,fp8,0,0.06621919870376587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,4,1,128,1,fp8,fp8,0,0.06640800237655639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,4,2,128,1,float16,float16,0,0.04941279888153076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,4,2,128,1,float16,fp8,0,0.06573119759559631
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,4,2,128,1,fp8,fp8,0,0.06694080233573914
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,4,4,128,1,float16,float16,0,0.0350383996963501
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,4,4,128,1,float16,fp8,0,0.039103999733924866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,4,4,128,1,fp8,fp8,0,0.03910079896450043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,4,1,128,1,float16,float16,0,0.028948798775672913
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,4,1,128,1,float16,fp8,0,0.03909600079059601
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,4,1,128,1,fp8,fp8,0,0.03902080059051514
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,4,2,128,1,float16,float16,0,0.028880000114440918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,4,2,128,1,float16,fp8,0,0.039105600118637084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,4,4,128,1,float16,float16,0,0.020660799741744996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,4,4,128,1,float16,fp8,0,0.02284960001707077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,4,4,128,1,fp8,fp8,0,0.02468319982290268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,4,1,128,1,float16,float16,0,0.018811200559139252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,4,4,128,1,float16,float16,0,0.10461119413375855
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,4,1,128,1,float16,fp8,0,0.02288320064544678
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,4,1,128,1,fp8,fp8,0,0.02268799990415573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,4,2,128,1,float16,float16,0,0.018646399676799773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,4,2,128,1,float16,fp8,0,0.024383999407291412
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,4,2,128,1,fp8,fp8,0,0.02452639937400818
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,4,2,128,1,fp8,fp8,0,0.12411199808120728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,4,4,128,1,float16,float16,0,0.01656000018119812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,4,4,128,1,float16,fp8,0,0.01870879977941513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,4,1,128,1,float16,float16,0,0.016579200327396394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,4,1,128,1,float16,fp8,0,0.018801599740982056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,4,1,128,1,fp8,fp8,0,0.018769599497318268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,4,2,128,1,float16,float16,0,0.016663999855518342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,4,2,128,1,float16,fp8,0,0.018795199692249298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,4,2,128,1,fp8,fp8,0,0.018671999871730804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,4,4,128,1,float16,float16,0,0.016550399363040924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,4,2,128,1,fp8,fp8,0,0.03910239934921265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,4,4,128,1,float16,fp8,0,0.016630400717258454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,4,4,128,1,fp8,fp8,0,0.016568000614643096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,4,1,128,1,float16,float16,0,0.0164560005068779
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,4,1,128,1,float16,fp8,0,0.01659200042486191
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,4,1,128,1,fp8,fp8,0,0.016697600483894348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,4,2,128,1,float16,float16,0,0.01586720049381256
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,4,2,128,1,float16,fp8,0,0.01648640036582947
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,4,2,128,1,fp8,fp8,0,0.01660960018634796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,4,4,128,1,float16,float16,0,0.016359999775886536
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,4,4,128,1,float16,fp8,0,0.014681600034236908
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,4,4,128,1,fp8,fp8,0,0.014620800316333771
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,4,1,128,1,float16,float16,0,0.014603200554847717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,4,1,128,1,float16,fp8,0,0.014627200365066529
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,4,1,128,1,fp8,fp8,0,0.014606399834156037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,4,2,128,1,float16,float16,0,0.01467680037021637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,4,2,128,1,float16,fp8,0,0.014611199498176575
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,4,2,128,1,fp8,fp8,0,0.014601600170135499
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,4,4,128,1,float16,float16,0,0.014528000354766845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,4,4,128,1,float16,fp8,0,0.014552000164985656
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,4,1,128,1,float16,float16,0,0.01459999978542328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,4,1,128,1,float16,fp8,0,0.014619199931621552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,4,1,128,1,fp8,fp8,0,0.014584000408649444
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,4,2,128,1,float16,float16,0,0.014696000516414643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,4,2,128,1,float16,fp8,0,0.01459839940071106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,4,4,128,1,fp8,fp8,0,0.018638400733470915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,4,2,128,1,fp8,fp8,0,0.014694400131702423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,4,1,128,1,float16,float16,0,0.06997920274734497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,4,1,128,1,float16,fp8,0,0.11303999423980712
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,4,1,128,1,fp8,fp8,0,0.11259039640426635
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,4,2,128,1,float16,float16,0,0.0727504014968872
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,4,2,128,1,float16,fp8,0,0.11292799711227416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,4,2,128,1,fp8,fp8,0,0.11291999816894531
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,4,4,128,1,float16,float16,0,0.05361440181732178
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,4,4,128,1,float16,fp8,0,0.06374080181121826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,4,4,128,1,fp8,fp8,0,0.0635424017906189
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,4,1,128,1,float16,float16,0,0.041116800904273984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,4,1,128,1,float16,fp8,0,0.061745601892471316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,4,1,128,1,fp8,fp8,0,0.061668801307678225
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,4,2,128,1,float16,float16,0,0.043163201212882994
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,4,2,128,1,float16,fp8,0,0.061598402261734006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,4,4,128,1,float16,float16,0,0.030899199843406677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,4,2,128,1,fp8,fp8,0,0.061887997388839724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,4,4,128,1,float16,fp8,0,0.035017600655555724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,4,4,128,1,fp8,fp8,0,0.03500480055809021
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,4,1,128,1,float16,float16,0,0.0247856006026268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,4,1,128,1,float16,fp8,0,0.03508319854736328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,4,1,128,1,fp8,fp8,0,0.03502880036830902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,4,2,128,1,float16,float16,0,0.02467840015888214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,4,2,128,1,float16,fp8,0,0.034959998726844785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,4,2,128,1,fp8,fp8,0,0.035016000270843506
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,4,4,128,1,float16,float16,0,0.018542400002479552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,4,4,128,1,float16,fp8,0,0.022331200540065765
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,4,4,128,1,fp8,fp8,0,0.020750400424003602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,4,1,128,1,float16,float16,0,0.01656160056591034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,4,1,128,1,float16,fp8,0,0.020777599513530733
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,4,1,128,1,fp8,fp8,0,0.020742399990558623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,4,2,128,1,float16,fp8,0,0.020764799416065217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,4,4,128,1,fp8,fp8,0,0.014545600116252898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,4,2,128,1,fp8,fp8,0,0.020708799362182617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,4,4,128,1,float16,float16,0,0.014662399888038635
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,4,4,128,1,fp8,fp8,0,0.016569599509239197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,4,1,128,1,float16,float16,0,0.014913600683212281
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,4,1,128,1,float16,fp8,0,0.016564799845218657
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,4,1,128,1,fp8,fp8,0,0.016705599427223206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,4,2,128,1,float16,float16,0,0.014497600495815277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,4,2,128,1,float16,fp8,0,0.01663679927587509
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,4,2,128,1,fp8,fp8,0,0.016550399363040924
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,4,4,128,1,float16,float16,0,0.014374400675296783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,4,4,128,1,float16,fp8,0,0.014528000354766845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,4,4,128,1,fp8,fp8,0,0.014457599818706512
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,4,1,128,1,float16,float16,0,0.012479999661445617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,4,1,128,1,float16,fp8,0,0.01467519998550415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,4,1,128,1,fp8,fp8,0,0.014545600116252898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,4,2,128,1,float16,float16,0,0.014377599954605103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,4,2,128,1,float16,fp8,0,0.014496000111103058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,4,2,128,1,fp8,fp8,0,0.01462240070104599
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,4,4,128,1,float16,float16,0,0.012441600114107132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,4,4,128,1,float16,fp8,0,0.014393599331378936
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,4,4,128,1,fp8,fp8,0,0.012478400021791458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,4,1,128,1,float16,float16,0,0.01260959953069687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,4,1,128,1,float16,fp8,0,0.012449599802494049
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,4,1,128,1,fp8,fp8,0,0.012556800246238708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,4,2,128,1,float16,float16,0,0.012467200309038163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,4,2,128,1,float16,fp8,0,0.012561599910259246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,4,2,128,1,fp8,fp8,0,0.012577599287033081
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,4,4,128,1,float16,float16,0,0.012620800733566284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,4,4,128,1,float16,fp8,0,0.01265760064125061
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,4,4,128,1,fp8,fp8,0,0.012670400738716125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,4,1,128,1,float16,float16,0,0.012608000636100769
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,4,1,128,1,float16,fp8,0,0.012625600397586822
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,4,1,128,1,fp8,fp8,0,0.012542399764060973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,4,2,128,1,float16,float16,0,0.01268800050020218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,4,2,128,1,float16,fp8,0,0.012620800733566284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,4,2,128,1,fp8,fp8,0,0.012638400495052337
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,4,2,128,1,float16,float16,0,0.016484799981117248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,4,4,128,1,float16,float16,0,0.012591999769210816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,4,4,128,1,float16,fp8,0,0.012455999851226807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,4,4,128,1,float16,fp8,0,0.016568000614643096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,4,1,128,1,float16,fp8,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,4,1,128,1,fp8,fp8,0,0.012479999661445617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,4,2,128,1,float16,float16,0,0.01242239996790886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,4,2,128,1,float16,fp8,0,0.01247360035777092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,4,2,128,1,fp8,fp8,0,0.012428800016641617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,4,1,128,1,float16,float16,0,0.043268799781799316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,4,1,128,1,float16,fp8,0,0.06379680037498474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,4,1,128,1,fp8,fp8,0,0.06369280219078063
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,4,2,128,1,float16,float16,0,0.04534400105476379
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,4,2,128,1,float16,fp8,0,0.06387519836425781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,4,2,128,1,fp8,fp8,0,0.06390560269355774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,4,4,128,1,float16,float16,0,0.03134399950504303
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,4,4,128,1,float16,fp8,0,0.037108799815177916
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,4,4,128,1,fp8,fp8,0,0.037057599425315856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,4,1,128,1,float16,float16,0,0.02682879865169525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,4,1,128,1,float16,fp8,0,0.037083199620246886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,4,1,128,1,fp8,fp8,0,0.037064000964164734
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,4,2,128,1,float16,float16,0,0.026807999610900878
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,4,2,128,1,float16,fp8,0,0.037187200784683225
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,4,2,128,1,fp8,fp8,0,0.03707680106163025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,4,4,128,1,fp8,fp8,0,0.012518399953842163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,4,4,128,1,float16,float16,0,0.01866080015897751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,4,1,128,1,float16,float16,0,0.01255200058221817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,4,4,128,1,fp8,fp8,0,0.02272160053253174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,4,4,128,1,float16,fp8,0,0.0227183997631073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,4,1,128,1,float16,float16,0,0.01672160029411316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,4,1,128,1,fp8,fp8,0,0.02272160053253174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,4,2,128,1,float16,float16,0,0.016607999801635742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,4,2,128,1,float16,fp8,0,0.022742399573326112
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,4,2,128,1,fp8,fp8,0,0.022776000201702118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,4,4,128,1,float16,float16,0,0.014404800534248353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,4,4,128,1,fp8,fp8,0,0.014665600657463074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,4,4,128,1,float16,fp8,0,0.014654399454593658
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,4,1,128,1,float16,float16,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,4,1,128,1,float16,fp8,0,0.014560000598430633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,4,1,128,1,fp8,fp8,0,0.014558400213718414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,4,2,128,1,float16,float16,0,0.012558400630950928
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,4,2,128,1,float16,fp8,0,0.014596800506114959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,4,2,128,1,fp8,fp8,0,0.014734399318695069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,4,4,128,1,float16,float16,0,0.012377600371837615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,4,4,128,1,float16,fp8,0,0.012478400021791458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,4,4,128,1,fp8,fp8,0,0.012593600153923034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,4,1,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,4,1,128,1,float16,fp8,0,0.012401600182056428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,4,1,128,1,fp8,fp8,0,0.012486399710178375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,4,2,128,1,fp8,fp8,0,0.012563200294971466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,4,2,128,1,float16,fp8,0,0.012449599802494049
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,4,4,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,4,4,128,1,float16,fp8,0,0.010623999685049058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,4,4,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,4,1,128,1,float16,float16,0,0.010604800283908844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,4,1,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,4,1,128,1,fp8,fp8,0,0.011488000303506852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,4,2,128,1,float16,float16,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,4,2,128,1,float16,fp8,0,0.011513599753379821
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,4,2,128,1,fp8,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,4,4,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,4,4,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,4,1,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,4,1,128,1,float16,fp8,0,0.010558400303125381
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,4,1,128,1,fp8,fp8,0,0.010604800283908844
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,4,2,128,1,float16,float16,0,0.010659199953079224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,4,2,128,1,float16,fp8,0,0.01053759977221489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,4,2,128,1,fp8,fp8,0,0.010532800108194351
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,4,4,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,4,1,128,1,float16,fp8,0,0.022728000581264497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,4,4,128,1,float16,fp8,0,0.010310400277376175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,4,4,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,4,1,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,4,1,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,4,1,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,4,2,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,4,2,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,4,4,128,1,float16,float16,0,0.010601600259542465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,4,4,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,4,4,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,4,1,128,1,float16,float16,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,4,1,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,4,2,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,4,2,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,4,2,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,4,2,128,1,fp8,fp8,0,0.010518400371074677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,4,1,128,1,float16,float16,0,0.03304960131645203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,4,1,128,1,float16,fp8,0,0.04326240122318268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,4,4,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,4,1,128,1,fp8,fp8,0,0.04318079948425293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,4,2,128,1,float16,float16,0,0.03305439949035645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,4,2,128,1,float16,fp8,0,0.04334399998188019
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,4,2,128,1,fp8,fp8,0,0.04325119853019714
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,4,4,128,1,float16,float16,0,0.023438400030136107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,4,4,128,1,float16,fp8,0,0.026888000965118408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,4,4,128,1,fp8,fp8,0,0.02675040066242218
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,4,1,128,1,float16,float16,0,0.02067999988794327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,4,1,128,1,float16,fp8,0,0.02680639922618866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,4,1,128,1,fp8,fp8,0,0.026804798841476442
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,4,2,128,1,float16,float16,0,0.02070080041885376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,4,2,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,4,2,128,1,float16,fp8,0,0.02672480046749115
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,4,2,128,1,fp8,fp8,0,0.026846399903297423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,4,4,128,1,float16,fp8,0,0.016548800468444824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,4,4,128,1,fp8,fp8,0,0.0166143998503685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,4,1,128,1,float16,float16,0,0.014606399834156037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,4,1,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,4,1,128,1,float16,fp8,0,0.016607999801635742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,4,2,128,1,float16,float16,0,0.014483200013637542
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,4,1,128,1,fp8,fp8,0,0.016734400391578676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,4,2,128,1,float16,fp8,0,0.01658399999141693
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,4,2,128,1,fp8,fp8,0,0.01669600009918213
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,4,4,128,1,float16,float16,0,0.012457600235939026
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,4,4,128,1,float16,fp8,0,0.012520000338554382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,4,4,128,1,fp8,fp8,0,0.012511999905109405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,4,1,128,1,float16,float16,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,4,1,128,1,float16,fp8,0,0.01244639977812767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,4,1,128,1,fp8,fp8,0,0.012455999851226807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,4,2,128,1,float16,fp8,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,4,2,128,1,fp8,fp8,0,0.01242239996790886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,4,4,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,4,4,128,1,float16,fp8,0,0.010630399733781815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,4,4,128,1,fp8,fp8,0,0.010569600015878677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,4,1,128,1,float16,float16,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,4,1,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,4,1,128,1,fp8,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,4,2,128,1,float16,float16,0,0.010585600137710571
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,4,2,128,1,float16,fp8,0,0.010673599690198899
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,4,2,128,1,fp8,fp8,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,4,4,128,1,float16,float16,0,0.010371199995279311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,4,4,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,4,4,128,1,float16,float16,0,0.016439999639987945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,4,1,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,4,1,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,4,1,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,4,2,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,4,2,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,4,2,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,4,4,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,4,4,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,4,4,128,1,fp8,fp8,0,0.010353600233793258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,4,1,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,4,1,128,1,float16,fp8,0,0.010355199873447418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,4,1,128,1,fp8,fp8,0,0.009303999692201614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,4,2,128,1,float16,float16,0,0.010740800201892853
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,4,2,128,1,float16,fp8,0,0.009880000352859497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,4,2,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,4,4,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,4,4,128,1,float16,fp8,0,0.009377600252628326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,4,4,128,1,fp8,fp8,0,0.00939520001411438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,4,1,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,4,1,128,1,float16,fp8,0,0.010311999917030334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,4,1,128,1,fp8,fp8,0,0.008703999966382981
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,4,2,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,4,2,128,1,float16,fp8,0,0.008947200328111648
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,4,2,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,4,4,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,4,4,128,1,float16,fp8,0,0.01034879982471466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,4,4,128,1,fp8,fp8,0,0.009758400171995163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,4,1,128,1,float16,float16,0,0.010199999809265137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,4,1,128,1,float16,fp8,0,0.009419199824333192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,4,1,128,1,fp8,fp8,0,0.0083856001496315
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,4,2,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,4,2,128,1,float16,fp8,0,0.00878560021519661
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,4,1,128,1,float16,float16,0,0.028856000304222106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,4,1,128,1,float16,fp8,0,0.03295199871063233
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,4,1,128,1,fp8,fp8,0,0.03296160101890564
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,4,2,128,1,float16,float16,0,0.02895359992980957
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,4,2,128,1,float16,fp8,0,0.03299039900302887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,4,2,128,1,fp8,fp8,0,0.032913601398468016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,4,4,128,1,float16,float16,0,0.020691199600696562
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,4,4,128,1,float16,fp8,0,0.020683200657367708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,4,4,128,1,fp8,fp8,0,0.020905600488185884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,4,1,128,1,float16,float16,0,0.018662400543689728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,4,1,128,1,float16,fp8,0,0.020768000185489653
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,4,1,128,1,fp8,fp8,0,0.02080159932374954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,4,2,128,1,float16,float16,0,0.019412800669670105
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,4,2,128,1,float16,float16,0,0.010358399897813796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,4,2,128,1,float16,fp8,0,0.020742399990558623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,4,2,128,1,fp8,fp8,0,0.02152799963951111
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,4,4,128,1,float16,float16,0,0.014550399780273438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,4,4,128,1,float16,fp8,0,0.014571200311183929
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,4,1,128,1,float16,float16,0,0.014446400105953217
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,4,1,128,1,float16,fp8,0,0.014691199362277984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,4,1,128,1,fp8,fp8,0,0.01446239948272705
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,4,2,128,1,float16,float16,0,0.014448000490665436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,4,2,128,1,float16,fp8,0,0.014593599736690522
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,4,2,128,1,fp8,fp8,0,0.014644800126552582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,4,2,128,1,fp8,fp8,0,0.00846560001373291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,4,4,128,1,float16,float16,0,0.010571199655532836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,4,4,128,1,float16,fp8,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,4,4,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,4,1,128,1,float16,float16,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,4,1,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,4,1,128,1,fp8,fp8,0,0.010598400235176086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,4,4,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,4,2,128,1,float16,float16,0,0.010608000308275222
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,4,2,128,1,float16,fp8,0,0.010592000186443329
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,4,2,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,4,4,128,1,float16,float16,0,0.010369600355625152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,4,4,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,4,4,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,4,1,128,1,float16,float16,0,0.010320000350475311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,4,1,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,4,1,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,4,2,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,4,2,128,1,float16,fp8,0,0.010503999888896942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,4,2,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,4,4,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,4,4,128,1,float16,fp8,0,0.010315199941396713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,4,4,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,4,1,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,4,1,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,4,1,128,1,fp8,fp8,0,0.010291200131177902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,4,2,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,4,2,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,4,4,128,1,fp8,fp8,0,0.01451839953660965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,4,4,128,1,float16,float16,0,0.010367999970912933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,4,4,128,1,float16,fp8,0,0.008460800349712371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,4,1,128,1,float16,float16,0,0.008675199747085572
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,4,1,128,1,float16,fp8,0,0.008343999832868576
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,4,1,128,1,fp8,fp8,0,0.008428800106048583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,4,2,128,1,float16,float16,0,0.00915839970111847
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,4,2,128,1,float16,fp8,0,0.009478399902582169
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,4,2,128,1,fp8,fp8,0,0.009358400106430053
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,4,4,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,4,4,128,1,float16,fp8,0,0.009321600198745728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,4,4,128,1,fp8,fp8,0,0.008406399935483932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,4,1,128,1,float16,float16,0,0.008574400097131729
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,4,1,128,1,float16,fp8,0,0.008446399867534638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,4,1,128,1,fp8,fp8,0,0.008367999643087386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,4,2,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,4,2,128,1,float16,fp8,0,0.009441599994897843
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,4,2,128,1,fp8,fp8,0,0.009276799857616425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,4,4,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,4,4,128,1,float16,fp8,0,0.008680000156164169
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,4,4,128,1,fp8,fp8,0,0.008540800213813782
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,4,1,128,1,float16,float16,0,0.010316800326108932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,4,1,128,1,float16,fp8,0,0.008448000252246856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,4,1,128,1,fp8,fp8,0,0.008448000252246856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,4,2,128,1,float16,fp8,0,0.010351999849081039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,4,2,128,1,float16,fp8,0,0.008619199693202972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,4,2,128,1,fp8,fp8,0,0.008500800281763077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,4,4,128,1,fp8,fp8,0,0.009438399970531464
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,4,1,128,1,float16,float16,0,0.027569600939750673
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,4,1,128,1,float16,fp8,0,0.028948798775672913
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,4,1,128,1,fp8,fp8,0,0.028961598873138428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,4,2,128,1,float16,fp8,0,0.02889919877052307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,4,2,128,1,fp8,fp8,0,0.028940799832344054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,4,4,128,1,float16,float16,0,0.018812799453735353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,4,4,128,1,float16,fp8,0,0.0188960000872612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,4,4,128,1,fp8,fp8,0,0.018756799399852753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,4,1,128,1,float16,float16,0,0.018771199882030486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,4,1,128,1,float16,fp8,0,0.018665599822998046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,4,1,128,1,fp8,fp8,0,0.018750399351119995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,4,2,128,1,float16,float16,0,0.01876640021800995
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,4,2,128,1,float16,fp8,0,0.01881760060787201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,4,2,128,1,fp8,fp8,0,0.019198399782180787
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,4,4,128,1,float16,float16,0,0.014404800534248353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,4,4,128,1,float16,fp8,0,0.014484800398349762
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,4,4,128,1,fp8,fp8,0,0.012895999848842621
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,4,1,128,1,float16,float16,0,0.014451199769973755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,4,1,128,1,float16,fp8,0,0.014470399916172027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,4,2,128,1,float16,float16,0,0.012828800082206725
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,4,2,128,1,float16,fp8,0,0.014468799531459808
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,4,2,128,1,fp8,fp8,0,0.014500799775123595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,4,4,128,1,float16,float16,0,0.010601600259542465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,4,4,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,4,4,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,4,1,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,4,1,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,4,1,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,4,2,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,4,2,128,1,float16,float16,0,0.008363199979066848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,4,2,128,1,float16,fp8,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,4,2,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,4,4,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,4,4,128,1,float16,fp8,0,0.010121600329875946
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,4,4,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,4,1,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,4,1,128,1,fp8,fp8,0,0.010355199873447418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,4,2,128,1,float16,float16,0,0.02701599895954132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,4,2,128,1,float16,float16,0,0.010313600301742554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,4,2,128,1,float16,fp8,0,0.010326399654150008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,4,2,128,1,fp8,fp8,0,0.01032159999012947
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,4,4,128,1,float16,float16,0,0.00997759997844696
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,4,4,128,1,float16,fp8,0,0.009991999715566635
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,4,4,128,1,fp8,fp8,0,0.008446399867534638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,4,1,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,4,1,128,1,float16,fp8,0,0.008422400057315826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,4,1,128,1,fp8,fp8,0,0.008472000062465668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,4,2,128,1,float16,float16,0,0.009478399902582169
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,4,2,128,1,float16,fp8,0,0.009560000151395798
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,4,2,128,1,fp8,fp8,0,0.009612800180912017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,4,4,128,1,float16,float16,0,0.010353600233793258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,4,4,128,1,float16,fp8,0,0.009446399658918381
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,4,4,128,1,fp8,fp8,0,0.009097599983215332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,4,1,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,4,1,128,1,float16,fp8,0,0.008792000263929367
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,4,1,128,1,fp8,fp8,0,0.008799999952316284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,4,2,128,1,float16,float16,0,0.009105599671602248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,4,2,128,1,float16,fp8,0,0.00843520015478134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,4,2,128,1,fp8,fp8,0,0.00843520015478134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,4,4,128,1,float16,float16,0,0.0084927998483181
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,4,4,128,1,float16,fp8,0,0.008446399867534638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,4,4,128,1,fp8,fp8,0,0.008470399677753449
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,4,1,128,1,float16,float16,0,0.00846880003809929
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,4,1,128,1,float16,fp8,0,0.00846719965338707
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,4,1,128,1,fp8,fp8,0,0.00859680026769638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,4,1,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,4,2,128,1,float16,float16,0,0.009675200283527374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,4,2,128,1,float16,fp8,0,0.008441600203514098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,4,2,128,1,fp8,fp8,0,0.008694399893283845
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,4,4,128,1,float16,float16,0,0.008449599891901017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,4,4,128,1,float16,fp8,0,0.00843840017914772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,4,4,128,1,fp8,fp8,0,0.008372800052165985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,4,1,128,1,float16,fp8,0,0.008380799740552902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,4,1,128,1,fp8,fp8,0,0.00840959995985031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,4,2,128,1,float16,float16,0,0.009542399644851684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,4,2,128,1,float16,fp8,0,0.008379200100898742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,4,2,128,1,fp8,fp8,0,0.008392000198364257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,0,0.026819199323654175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,4,1,128,1,fp8,fp8,0,0.014460800588130951
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,0,0.02476799935102463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,0,0.02682879865169525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,4,1,128,1,fp8,fp8,0,0.02510559856891632
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,0,0.024758400022983552
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,4,2,128,1,fp8,fp8,0,0.025391998887062072
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,0,0.018639999628067016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,0,0.018134400248527527
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,4,4,128,1,fp8,fp8,0,0.016627199947834015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,0,0.018529599905014037
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,0,0.016571199893951415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,4,1,128,1,fp8,fp8,0,0.016646400094032288
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,0,0.018561600148677825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,0,0.016732800006866454
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,4,2,128,1,fp8,fp8,0,0.01668799966573715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,0,0.012742400169372559
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,0,0.012611199915409089
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,4,4,128,1,fp8,fp8,0,0.012540799379348756
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,0,0.012566399574279786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,0,0.012542399764060973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,4,1,128,1,fp8,fp8,0,0.01249760016798973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,0,0.014451199769973755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,0,0.012425599992275238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,4,2,128,1,fp8,fp8,0,0.012615999579429627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,4,4,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,0,0.010353600233793258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,4,1,128,1,fp8,fp8,0,0.010292799770832061
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,4,2,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,0,0.010305599868297577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,4,1,128,1,float16,float16,0,0.00843840017914772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,4,4,128,1,fp8,fp8,0,0.008455999940633774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,4,1,128,1,fp8,fp8,0,0.009647999703884125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,4,2,128,1,fp8,fp8,0,0.010366400331258773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,0,0.008987200260162354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,4,4,128,1,fp8,fp8,0,0.00902559980750084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,0,0.01034879982471466
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,0,0.00841120034456253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,4,1,128,1,fp8,fp8,0,0.010284800082445145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,0,0.010235200077295304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,4,2,128,1,fp8,fp8,0,0.008446399867534638
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,0,0.00852160006761551
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,4,4,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,0,0.009524799883365631
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,4,1,128,1,fp8,fp8,0,0.008372800052165985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,0,0.010300800204277039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,0,0.008392000198364257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,0,0.008984000235795975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,0,0.008579199761152267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,4,4,128,1,fp8,fp8,0,0.008473599702119828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,0,0.009777600318193436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,0,0.008462399989366532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,4,1,128,1,fp8,fp8,0,0.008403199911117553
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,0,0.009188800305128097
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,0,0.01032480001449585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,4,2,128,1,fp8,fp8,0,0.008379200100898742
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,0,0.008390399813652038
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,0,0.008455999940633774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,4,4,128,1,fp8,fp8,0,0.008408000320196151
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,0,0.0083856001496315
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,0,0.00835999995470047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,4,1,128,1,fp8,fp8,0,0.00843520015478134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,0,0.008803199976682663
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,0,0.008441600203514098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,4,2,128,1,fp8,fp8,0,0.00843840017914772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,2,1,128,1,float16,float16,0,0.7102431774139404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,2,1,128,1,float16,fp8,0,0.6811888217926025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,4,2,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,2,2,128,1,float16,float16,0,0.4148687839508057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16384,2,1,128,1,fp8,fp8,0,0.6811552047729492
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,0,0.008551999926567078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,2,2,128,1,float16,fp8,0,0.38674559593200686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,2,2,128,1,fp8,fp8,0,0.3853935956954956
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,2,1,128,1,float16,float16,0,0.3954751968383789
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,2,1,128,1,float16,fp8,0,0.38350400924682615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,2,1,128,1,fp8,fp8,0,0.3835551977157593
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,2,2,128,1,float16,float16,0,0.24667680263519287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,2,2,128,1,fp8,fp8,0,0.2357072114944458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,2,1,128,1,float16,float16,0,0.24603519439697266
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,2,1,128,1,fp8,fp8,0,0.2362351894378662
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,2,2,128,1,float16,float16,0,0.15900800228118897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,2,2,128,1,float16,fp8,0,0.15391839742660524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,2,2,128,1,fp8,fp8,0,0.15195200443267823
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,2,1,128,1,float16,float16,0,0.1599552035331726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,2,1,128,1,float16,fp8,0,0.15212639570236205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,2,1,128,1,fp8,fp8,0,0.1525823950767517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,2,1,128,1,float16,float16,0,0.4297776222229004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,2,1,128,1,float16,fp8,0,0.43504319190979
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,2,2,128,1,float16,float16,0,0.2598720073699951
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,12288,2,1,128,1,fp8,fp8,0,0.43478879928588865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,2,2,128,1,fp8,fp8,0,0.2502032041549683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,2,1,128,1,float16,float16,0,0.24693760871887208
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,2,2,128,1,float16,fp8,0,0.23775200843811034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,2,1,128,1,float16,fp8,0,0.2500272035598755
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,2,1,128,1,float16,fp8,0,0.23592801094055177
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,2,1,128,1,fp8,fp8,0,0.2503119945526123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,2,2,128,1,float16,float16,0,0.15549440383911134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,2,2,128,1,fp8,fp8,0,0.15782079696655274
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,2,2,128,1,float16,fp8,0,0.15590399503707886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,2,1,128,1,float16,float16,0,0.15498559474945067
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,2,1,128,1,float16,fp8,0,0.15588639974594115
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,2,1,128,1,fp8,fp8,0,0.15583839416503906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,2,2,128,1,float16,float16,0,0.12106879949569702
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,2,2,128,1,float16,fp8,0,0.1189520001411438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,2,2,128,1,fp8,fp8,0,0.11705440282821655
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,2,1,128,1,float16,float16,0,0.12108319997787476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,2,1,128,1,float16,fp8,0,0.11694400310516358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,2,1,128,1,fp8,fp8,0,0.1171231985092163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,2,1,128,1,float16,float16,0,0.3176016092300415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,2,1,128,1,float16,fp8,0,0.3312112092971802
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,2,2,128,1,float16,float16,0,0.1949615955352783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,2,2,128,1,float16,fp8,0,0.1987920045852661
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,2,2,128,1,fp8,fp8,0,0.19689760208129883
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,2,1,128,1,float16,float16,0,0.19315199851989745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,2,1,128,1,float16,fp8,0,0.19675040245056152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,2,1,128,1,fp8,fp8,0,0.1974079966545105
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,2,2,128,1,float16,float16,0,0.11615519523620606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,2,2,128,1,float16,fp8,0,0.11909600496292114
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,2,2,128,1,float16,fp8,0,0.25192320346832275
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,2,1,128,1,float16,float16,0,0.11683039665222168
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,2,1,128,1,float16,fp8,0,0.11892960071563721
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,2,1,128,1,fp8,fp8,0,0.11895359754562378
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,2,2,128,1,float16,float16,0,0.10268640518188477
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,2,2,128,1,float16,fp8,0,0.10062079429626465
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,2,2,128,1,fp8,fp8,0,0.10056320428848267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,2,1,128,1,float16,float16,0,0.1026047945022583
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,2,1,128,1,float16,fp8,0,0.10056639909744262
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,2,1,128,1,fp8,fp8,0,0.10054880380630493
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,2,1,128,1,float16,float16,0,0.39542880058288576
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,2,1,128,1,float16,fp8,0,0.4348624229431152
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,8192,2,1,128,1,fp8,fp8,0,0.4332592010498047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,2,2,128,1,float16,float16,0,0.23745279312133788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,10240,2,1,128,1,fp8,fp8,0,0.33085439205169676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,2,2,128,1,fp8,fp8,0,0.244598388671875
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,2,1,128,1,float16,float16,0,0.22437601089477538
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,2,1,128,1,float16,fp8,0,0.24198079109191895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,2,2,128,1,float16,float16,0,0.1411311984062195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,2,1,128,1,fp8,fp8,0,0.23985280990600585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,2,2,128,1,float16,fp8,0,0.14698560237884523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,2,2,128,1,fp8,fp8,0,0.14654239416122436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,2,1,128,1,float16,float16,0,0.13788000345230103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,2,1,128,1,float16,fp8,0,0.14567359685897827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,2,2,128,1,fp8,fp8,0,0.11896159648895263
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,2,1,128,1,fp8,fp8,0,0.14578880071640016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,2,2,128,1,float16,fp8,0,0.09235360026359558
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,2,2,128,1,fp8,fp8,0,0.09246399998664856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,2,1,128,1,float16,float16,0,0.08839840292930604
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,2,1,128,1,float16,fp8,0,0.09225119948387146
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,2,2,128,1,float16,float16,0,0.0843392014503479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,2,2,128,1,float16,fp8,0,0.08220160007476807
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,2,2,128,1,fp8,fp8,0,0.08215680122375488
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,2,1,128,1,float16,float16,0,0.08421440124511718
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,2,1,128,1,float16,fp8,0,0.08213599920272827
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,2,1,128,1,fp8,fp8,0,0.08228639960289001
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,2,1,128,1,float16,float16,0,0.2548975944519043
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,2,2,128,1,float16,fp8,0,0.24284799098968507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,2,1,128,1,float16,fp8,0,0.28696160316467284
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,6144,2,1,128,1,fp8,fp8,0,0.28748159408569335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,2,2,128,1,float16,float16,0,0.15177760124206544
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,2,2,128,1,float16,fp8,0,0.1625983953475952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,2,2,128,1,fp8,fp8,0,0.16208800077438354
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,2,1,128,1,fp8,fp8,0,0.09237759709358215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,2,1,128,1,float16,float16,0,0.14049760103225709
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,2,1,128,1,float16,fp8,0,0.16186239719390869
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,2,1,128,1,fp8,fp8,0,0.16203999519348145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,2,2,128,1,float16,float16,0,0.0903056025505066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,2,2,128,1,float16,fp8,0,0.09992640018463135
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,2,2,128,1,fp8,fp8,0,0.10041279792785644
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,2,1,128,1,float16,float16,0,0.09041759967803956
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,2,1,128,1,float16,fp8,0,0.10038080215454101
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,2,1,128,1,fp8,fp8,0,0.10054399967193603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,2,2,128,1,float16,fp8,0,0.0719871997833252
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,2,2,128,1,float16,float16,0,0.06780800223350525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,2,2,128,1,fp8,fp8,0,0.07187839746475219
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,2,1,128,1,float16,float16,0,0.06788960099220276
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,2,1,128,1,float16,fp8,0,0.07191680073738098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,2,1,128,1,fp8,fp8,0,0.0718384027481079
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,2,2,128,1,float16,float16,0,0.06580479741096497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,2,2,128,1,float16,fp8,0,0.06455519795417786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,2,1,128,1,float16,float16,0,0.06577119827270508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,2,2,128,1,fp8,fp8,0,0.06569120287895203
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,2,1,128,1,float16,fp8,0,0.063755202293396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,2,1,128,1,fp8,fp8,0,0.06557919979095458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,2,1,128,1,float16,float16,0,0.2397536039352417
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,2,1,128,1,float16,fp8,0,0.30512640476226804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,2,2,128,1,float16,float16,0,0.14308639764785766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,4096,2,1,128,1,fp8,fp8,0,0.3057087898254395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,2,2,128,1,float16,float16,0,0.08827999830245972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,2,2,128,1,float16,fp8,0,0.16854079961776733
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,2,2,128,1,fp8,fp8,0,0.16900320053100587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,2,1,128,1,float16,float16,0,0.13470560312271118
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,2,2,128,1,float16,float16,0,0.08415039777755737
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,2,1,128,1,fp8,fp8,0,0.1661903977394104
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,2,2,128,1,float16,fp8,0,0.09846240282058716
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,2,2,128,1,fp8,fp8,0,0.0985040009021759
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,2,1,128,1,float16,float16,0,0.08212800025939941
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,2,1,128,1,float16,fp8,0,0.09850559830665588
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,2,1,128,1,fp8,fp8,0,0.09847360253334045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,2,2,128,1,float16,float16,0,0.05151839852333069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,2,2,128,1,float16,fp8,0,0.05955039858818054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,2,2,128,1,fp8,fp8,0,0.05968959927558899
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,2,1,128,1,float16,fp8,0,0.05964159965515137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,2,1,128,1,float16,float16,0,0.05137280225753784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,2,2,128,1,float16,float16,0,0.04935519993305206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,2,2,128,1,float16,fp8,0,0.05134879946708679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,2,1,128,1,float16,float16,0,0.04932000041007996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,2,2,128,1,fp8,fp8,0,0.05146080255508423
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,2,1,128,1,float16,fp8,0,0.05137760043144226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,2,1,128,1,fp8,fp8,0,0.05136640071868896
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,2,2,128,1,float16,float16,0,0.0472896009683609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,2,2,128,1,float16,fp8,0,0.04731839895248413
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,2,2,128,1,fp8,fp8,0,0.047198399901390076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,2,1,128,1,float16,float16,0,0.047276800870895384
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,2,1,128,1,float16,fp8,0,0.04726400077342987
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,2,1,128,1,fp8,fp8,0,0.047224000096321106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,2,1,128,1,float16,float16,0,0.15852799415588378
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,2,1,128,1,float16,fp8,0,0.2109679937362671
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,2,2,128,1,float16,float16,0,0.0947759985923767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,3072,2,1,128,1,fp8,fp8,0,0.2095263957977295
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,2,2,128,1,float16,fp8,0,0.11697280406951904
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,2,2,128,1,fp8,fp8,0,0.11706880331039429
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,2,1,128,1,float16,float16,0,0.08818560242652893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,2,1,128,1,float16,fp8,0,0.11607199907302856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,2,2,128,1,float16,float16,0,0.057550400495529175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,2,1,128,1,float16,fp8,0,0.16614880561828613
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,2,2,128,1,float16,fp8,0,0.06988319754600525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,2,2,128,1,fp8,fp8,0,0.06997759938240052
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,2,1,128,1,float16,fp8,0,0.0699567973613739
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,2,1,128,1,fp8,fp8,0,0.0699184000492096
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,2,2,128,1,float16,float16,0,0.04134719967842102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,2,2,128,1,float16,fp8,0,0.04741120040416717
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,2,2,128,1,fp8,fp8,0,0.04738239943981171
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,2,1,128,1,fp8,fp8,0,0.060971200466156006
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,2,1,128,1,float16,float16,0,0.04117920100688934
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,2,1,128,1,float16,fp8,0,0.0473904013633728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,2,1,128,1,fp8,fp8,0,0.04747200012207031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,2,2,128,1,float16,float16,0,0.03915359973907471
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,2,2,128,1,float16,fp8,0,0.04120959937572479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,2,2,128,1,fp8,fp8,0,0.04120000004768372
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,2,1,128,1,float16,float16,0,0.03912799954414368
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,2,1,128,1,float16,fp8,0,0.041124799847602846
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,2,1,128,1,fp8,fp8,0,0.041284799575805664
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,2,2,128,1,float16,float16,0,0.038950398564338684
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,2,2,128,1,float16,fp8,0,0.037145599722862244
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,2,2,128,1,fp8,fp8,0,0.03717440068721771
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,2,1,128,1,float16,float16,0,0.0390608012676239
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,2,1,128,1,float16,fp8,0,0.03714239895343781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,2,1,128,1,fp8,fp8,0,0.03711999952793121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,2,1,128,1,float16,float16,0,0.16353280544281007
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,2,1,128,1,float16,fp8,0,0.2397183895111084
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,2048,2,1,128,1,fp8,fp8,0,0.24045279026031494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,2,2,128,1,float16,float16,0,0.09660320281982422
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,2,2,128,1,float16,fp8,0,0.13133440017700196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,2,1,128,1,fp8,fp8,0,0.11616159677505493
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,2,1,128,1,float16,float16,0,0.0922927975654602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,2,1,128,1,float16,fp8,0,0.12807199954986573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,2,1,128,1,float16,float16,0,0.057051199674606326
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,2,1,128,1,fp8,fp8,0,0.12808640003204347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,2,2,128,1,float16,float16,0,0.05587040185928345
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,2,2,128,1,float16,fp8,0,0.0742031991481781
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,2,2,128,1,fp8,fp8,0,0.07477279901504516
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,2,1,128,1,float16,float16,0,0.05438399910926819
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,2,1,128,1,float16,fp8,0,0.07401760220527649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,2,2,128,1,float16,float16,0,0.034862399101257324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,2,2,128,1,float16,fp8,0,0.043191999197006226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,2,2,128,1,fp8,fp8,0,0.04318079948425293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,2,1,128,1,float16,float16,0,0.03297599852085113
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,2,1,128,1,float16,fp8,0,0.043166399002075195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,2,1,128,1,fp8,fp8,0,0.04321120083332062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,2,2,128,1,float16,float16,0,0.030900800228118898
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,2,2,128,1,float16,fp8,0,0.03500159978866577
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,2,2,128,1,fp8,fp8,0,0.03499360084533691
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,2,1,128,1,float16,float16,0,0.030907198786735535
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,2,1,128,1,float16,fp8,0,0.03494560122489929
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,2,1,128,1,fp8,fp8,0,0.03506560027599335
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,2,2,128,1,float16,float16,0,0.028995200991630554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,2,2,128,1,fp8,fp8,0,0.13072320222854614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,2,2,128,1,fp8,fp8,0,0.030859199166297913
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,2,1,128,1,float16,float16,0,0.02916960120201111
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,2,1,128,1,float16,fp8,0,0.030931198596954347
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,2,1,128,1,fp8,fp8,0,0.030852800607681273
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,2,2,128,1,float16,float16,0,0.028984001278877257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,2,2,128,1,float16,fp8,0,0.02895520031452179
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,2,2,128,1,fp8,fp8,0,0.028870400786399842
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,2,1,128,1,fp8,fp8,0,0.0738976001739502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,2,1,128,1,float16,float16,0,0.028944000601768494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,2,1,128,1,float16,fp8,0,0.028944000601768494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,2,1,128,1,fp8,fp8,0,0.028857600688934327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,2,1,128,1,float16,float16,0,0.11081440448760986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,2,1,128,1,float16,fp8,0,0.17051199674606324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1536,2,1,128,1,fp8,fp8,0,0.17189760208129884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,2,2,128,1,float16,fp8,0,0.09441279768943786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,2,2,128,1,fp8,fp8,0,0.0945360004901886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,2,1,128,1,float16,float16,0,0.06371679902076721
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,2,1,128,1,float16,fp8,0,0.09419040083885193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,2,1,128,1,fp8,fp8,0,0.09431359767913819
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,2,2,128,1,float16,float16,0,0.04115360081195831
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,2,2,128,1,float16,fp8,0,0.05558879971504212
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,2,2,128,1,fp8,fp8,0,0.05548959970474243
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,2,1,128,1,float16,float16,0,0.04121440052986145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,2,1,128,1,float16,fp8,0,0.05556480288505554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,2,1,128,1,fp8,fp8,0,0.05550720095634461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,2,2,128,1,float16,float16,0,0.028774398565292358
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,2,2,128,1,float16,fp8,0,0.03510879874229431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,2,2,128,1,fp8,fp8,0,0.035017600655555724
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,2,1,128,1,float16,float16,0,0.02884480059146881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,2,1,128,1,float16,fp8,0,0.03503040075302124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,2,1,128,1,fp8,fp8,0,0.03506079912185669
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,2,2,128,1,float16,float16,0,0.02507840096950531
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,2,2,128,1,float16,fp8,0,0.028857600688934327
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,2,2,128,1,fp8,fp8,0,0.02885279953479767
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,2,1,128,1,float16,float16,0,0.02670240104198456
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,2,1,128,1,float16,fp8,0,0.028833600878715514
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,2,1,128,1,fp8,fp8,0,0.028969600796699524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,2,2,128,1,float16,float16,0,0.02476799935102463
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,2,2,128,1,fp8,fp8,0,0.024804799258708952
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,2,2,128,1,float16,fp8,0,0.025707200169563293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,2,1,128,1,float16,fp8,0,0.024988800287246704
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,2,1,128,1,float16,float16,0,0.024777600169181825
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,2,1,128,1,fp8,fp8,0,0.025364801287651062
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,2,2,128,1,float16,float16,0,0.024563199281692503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,2,2,128,1,float16,fp8,0,0.024695999920368195
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,2,2,128,1,fp8,fp8,0,0.022870400547981264
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,2,1,128,1,float16,float16,0,0.024684800207614897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,2,1,128,1,float16,fp8,0,0.02274080067873001
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,2,1,128,1,fp8,fp8,0,0.024663999676704407
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,2,1,128,1,float16,float16,0,0.12078239917755126
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,2,2,128,1,float16,float16,0,0.06776319742202759
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,2,1,128,1,float16,fp8,0,0.2070607900619507
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1024,2,1,128,1,fp8,fp8,0,0.2071199893951416
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,2,2,128,1,float16,float16,0,0.07197120189666747
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,2,2,128,1,float16,fp8,0,0.11146559715270996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,2,1,128,1,float16,float16,0,0.06785600185394287
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,2,1,128,1,float16,fp8,0,0.1090656042098999
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,2,1,128,1,fp8,fp8,0,0.10882240533828735
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,2,2,128,1,float16,float16,0,0.04225760102272034
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,2,2,128,1,float16,fp8,0,0.0616703987121582
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,2,2,128,1,fp8,fp8,0,0.06167680025100708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,2,1,128,1,float16,float16,0,0.04119200110435486
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,2,1,128,1,float16,fp8,0,0.06169120073318481
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,2,1,128,1,fp8,fp8,0,0.06167680025100708
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,2,2,128,1,float16,float16,0,0.02486239969730377
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,2,2,128,1,float16,fp8,0,0.03645119965076447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,2,2,128,1,fp8,fp8,0,0.03694719970226288
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,2,1,128,1,float16,float16,0,0.024835200607776643
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,2,2,128,1,float16,fp8,0,0.03087199926376343
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,2,1,128,1,float16,fp8,0,0.037001600861549376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,2,1,128,1,fp8,fp8,0,0.03577919900417328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,2,2,128,1,float16,float16,0,0.022678400576114654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,2,2,128,1,fp8,fp8,0,0.026927998661994933
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,2,1,128,1,float16,float16,0,0.0227183997631073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,2,1,128,1,float16,fp8,0,0.02680160105228424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,2,1,128,1,fp8,fp8,0,0.026819199323654175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,2,2,128,1,float16,float16,0,0.020707200467586517
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,2,2,128,1,float16,fp8,0,0.02274399995803833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,2,2,128,1,fp8,fp8,0,0.022752000391483305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,2,1,128,1,float16,float16,0,0.020715199410915375
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,2,1,128,1,float16,fp8,0,0.022755199670791627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,2,1,128,1,fp8,fp8,0,0.02274879962205887
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,2,2,128,1,float16,float16,0,0.020638400316238405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,2,2,128,1,float16,fp8,0,0.020524799823760986
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,2,2,128,1,fp8,fp8,0,0.020795199275016784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,2,1,128,1,float16,float16,0,0.02056960016489029
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,2,1,128,1,float16,fp8,0,0.020692799985408784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,2,1,128,1,fp8,fp8,0,0.020603199303150178
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,2,2,128,1,float16,float16,0,0.01984799951314926
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,2,2,128,1,float16,fp8,0,0.018824000656604768
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,2,2,128,1,fp8,fp8,0,0.018748800456523895
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,2,1,128,1,float16,float16,0,0.018807999789714813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,2,1,128,1,float16,fp8,0,0.018756799399852753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,2,1,128,1,fp8,fp8,0,0.01873439997434616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,2,1,128,1,float16,float16,0,0.10052319765090942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,2,1,128,1,float16,fp8,0,0.18916159868240356
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,2,2,128,1,float16,fp8,0,0.02690559923648834
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,512,2,1,128,1,fp8,fp8,0,0.18894879817962645
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,2,2,128,1,float16,float16,0,0.05978239774703979
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,2,2,128,1,float16,fp8,0,0.1005519986152649
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,2,2,128,1,fp8,fp8,0,0.10141119956970215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,2,1,128,1,float16,float16,0,0.05691360235214234
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,2,1,128,1,float16,fp8,0,0.10037280321121216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,2,1,128,1,fp8,fp8,0,0.09928960204124451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,2,2,128,1,float16,float16,0,0.03517920076847077
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,2,2,128,1,float16,fp8,0,0.055606400966644286
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,2,2,128,1,fp8,fp8,0,0.05551999807357788
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,2,1,128,1,float16,float16,0,0.033025598526000975
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,2,1,128,1,float16,fp8,0,0.05548319816589355
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,2,1,128,1,fp8,fp8,0,0.055446398258209226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,2,2,128,1,float16,fp8,0,0.03099839985370636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,2,2,128,1,fp8,fp8,0,0.03109920024871826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,2,1,128,1,float16,float16,0,0.0208064004778862
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,2,1,128,1,float16,fp8,0,0.03115360140800476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,2,2,128,1,fp8,fp8,0,0.11085920333862305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,2,1,128,1,fp8,fp8,0,0.030990400910377504
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,2,2,128,1,float16,float16,0,0.016638399660587312
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,2,2,128,1,float16,fp8,0,0.022860799729824067
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,2,2,128,1,fp8,fp8,0,0.022724799811840057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,2,1,128,1,float16,float16,0,0.0181536003947258
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,2,1,128,1,float16,fp8,0,0.022699199616909027
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,2,1,128,1,fp8,fp8,0,0.022737599909305573
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,2,2,128,1,float16,float16,0,0.016499200463294984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,2,2,128,1,float16,fp8,0,0.018662400543689728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,2,2,128,1,fp8,fp8,0,0.01860000044107437
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,2,1,128,1,float16,float16,0,0.016540800034999848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,2,1,128,1,float16,fp8,0,0.018593600392341612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,2,1,128,1,fp8,fp8,0,0.018694399297237395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,2,2,128,1,float16,float16,0,0.014635199308395385
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,2,2,128,1,float16,fp8,0,0.016652800142765045
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,2,2,128,1,fp8,fp8,0,0.01659359931945801
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,2,1,128,1,float16,fp8,0,0.01664319932460785
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,2,1,128,1,float16,float16,0,0.01478240042924881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,2,1,128,1,fp8,fp8,0,0.016590400040149687
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,2,2,128,1,float16,float16,0,0.0146479994058609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,2,2,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,2,1,128,1,float16,float16,0,0.01465120017528534
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,2,2,128,1,fp8,fp8,0,0.014628799259662628
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,2,1,128,1,float16,fp8,0,0.014560000598430633
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,2,1,128,1,fp8,fp8,0,0.014654399454593658
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,2,2,128,1,float16,fp8,0,0.014511999487876893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,2,2,128,1,fp8,fp8,0,0.014596800506114959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,2,1,128,1,float16,float16,0,0.014609600603580474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,2,1,128,1,float16,fp8,0,0.014708800613880158
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,2,1,128,1,fp8,fp8,0,0.0144896000623703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,2,1,128,1,float16,float16,0,0.05137760043144226
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,2,1,128,1,float16,fp8,0,0.09439520239830017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,256,2,1,128,1,fp8,fp8,0,0.09438080191612244
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,2,2,128,1,float16,float16,0,0.028923198580741882
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,2,2,128,1,float16,fp8,0,0.05137280225753784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,2,2,128,1,fp8,fp8,0,0.051367998123168945
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,2,2,128,1,float16,float16,0,0.020793600380420683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,2,1,128,1,float16,float16,0,0.028787198662757873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,2,1,128,1,float16,fp8,0,0.0513264000415802
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,2,2,128,1,float16,float16,0,0.018673600256443025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,2,2,128,1,float16,fp8,0,0.028916800022125246
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,2,2,128,1,fp8,fp8,0,0.028993600606918336
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,2,1,128,1,float16,float16,0,0.018588800728321076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,2,1,128,1,float16,fp8,0,0.028999999165534973
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,2,1,128,1,fp8,fp8,0,0.02884640097618103
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,2,2,128,1,float16,float16,0,0.014591999351978302
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,2,2,128,1,float16,fp8,0,0.020635199546813966
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,2,2,128,1,fp8,fp8,0,0.020844799280166627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,2,1,128,1,float16,float16,0,0.014502400159835815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,2,1,128,1,float16,fp8,0,0.02083359956741333
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,2,1,128,1,fp8,fp8,0,0.02064799964427948
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,2,2,128,1,float16,float16,0,0.014444799721240997
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,2,2,128,1,float16,fp8,0,0.016524800658226015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,2,2,128,1,fp8,fp8,0,0.016680000722408293
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,2,1,128,1,float16,float16,0,0.014555199444293976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,2,1,128,1,float16,fp8,0,0.01669279932975769
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,2,1,128,1,fp8,fp8,0,0.016603200137615202
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,2,2,128,1,float16,float16,0,0.014351999759674073
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,2,2,128,1,float16,fp8,0,0.01449279934167862
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,2,2,128,1,fp8,fp8,0,0.014591999351978302
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,2,1,128,1,float16,float16,0,0.012492799758911132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,2,1,128,1,float16,fp8,0,0.014590400457382201
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,2,1,128,1,fp8,fp8,0,0.014523200690746307
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,2,2,128,1,float16,float16,0,0.012547199428081513
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,2,2,128,1,float16,fp8,0,0.014137600362300873
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,2,2,128,1,fp8,fp8,0,0.013601599633693695
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,2,1,128,1,float16,float16,0,0.012606400251388549
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,2,2,128,1,float16,float16,0,0.01451359987258911
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,2,1,128,1,float16,fp8,0,0.01257600039243698
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,2,1,128,1,fp8,fp8,0,0.01255200058221817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,2,2,128,1,float16,float16,0,0.012503999471664428
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,2,2,128,1,float16,fp8,0,0.012532800436019897
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,2,2,128,1,fp8,fp8,0,0.012591999769210816
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,2,1,128,1,float16,float16,0,0.012535999715328216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,2,1,128,1,float16,fp8,0,0.0125231996178627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,2,1,128,1,fp8,fp8,0,0.012481600046157837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,2,2,128,1,float16,float16,0,0.012564800679683685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,2,2,128,1,float16,fp8,0,0.01247360035777092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,2,2,128,1,fp8,fp8,0,0.012492799758911132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,2,1,128,1,float16,float16,0,0.012521600723266602
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,2,1,128,1,float16,fp8,0,0.012468799948692322
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,2,1,128,1,fp8,fp8,0,0.012441600114107132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,2,1,128,1,float16,float16,0,0.031033599376678468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,2,1,128,1,float16,fp8,0,0.05342559814453125
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,2,1,128,1,fp8,fp8,0,0.05145599842071533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,128,2,1,128,1,fp8,fp8,0,0.053502398729324344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,2,2,128,1,float16,float16,0,0.01857600063085556
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,2,2,128,1,float16,fp8,0,0.030904000997543334
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,2,2,128,1,fp8,fp8,0,0.030980798602104186
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,2,1,128,1,float16,float16,0,0.018667200207710268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,2,1,128,1,float16,fp8,0,0.030905601382255555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,2,1,128,1,fp8,fp8,0,0.030943998694419862
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,2,2,128,1,float16,fp8,0,0.018742400407791137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,2,2,128,1,fp8,fp8,0,0.018785600364208222
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,2,1,128,1,float16,float16,0,0.014448000490665436
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,2,1,128,1,float16,fp8,0,0.01876160055398941
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,2,1,128,1,fp8,fp8,0,0.018764799833297728
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,2,2,128,1,float16,float16,0,0.012432000041007996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,2,2,128,1,float16,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,2,2,128,1,fp8,fp8,0,0.014542399346828461
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,2,1,128,1,float16,float16,0,0.012383999675512314
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,2,1,128,1,float16,fp8,0,0.014662399888038635
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,2,1,128,1,fp8,fp8,0,0.014504000544548035
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,2,2,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,2,2,128,1,fp8,fp8,0,0.0124719999730587
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,2,2,128,1,float16,fp8,0,0.012488000094890594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,2,1,128,1,float16,float16,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,2,1,128,1,float16,fp8,0,0.01233920007944107
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,2,1,128,1,fp8,fp8,0,0.012433599680662155
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,2,2,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,2,2,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,2,2,128,1,fp8,fp8,0,0.010598400235176086
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,2,1,128,1,float16,float16,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,2,1,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,2,1,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,2,2,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,2,2,128,1,fp8,fp8,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,2,2,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,2,1,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,2,1,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,2,1,128,1,fp8,fp8,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,2,2,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,2,2,128,1,float16,fp8,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,2,1,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,2,1,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,2,1,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,2,2,128,1,float16,float16,0,0.011072000116109848
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,2,2,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,2,2,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,2,1,128,1,float16,float16,0,0.010361599922180175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,2,1,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,2,2,128,1,float16,float16,0,0.012483199685811996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,2,1,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,2,1,128,1,float16,float16,0,0.022732800245285033
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,2,1,128,1,float16,fp8,0,0.03504959940910339
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,2,2,128,1,float16,float16,0,0.016575999557971954
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,2,2,128,1,float16,fp8,0,0.020751999318599702
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,2,2,128,1,fp8,fp8,0,0.02064639925956726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,2,1,128,1,float16,float16,0,0.01658560037612915
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,2,1,128,1,float16,fp8,0,0.020619200170040132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,2,1,128,1,fp8,fp8,0,0.02078240066766739
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,2,2,128,1,float16,float16,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,2,2,128,1,float16,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,2,2,128,1,fp8,fp8,0,0.014555199444293976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,2,1,128,1,float16,float16,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,2,1,128,1,float16,fp8,0,0.014480000734329224
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,2,1,128,1,fp8,fp8,0,0.01451359987258911
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,2,2,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,2,2,128,1,float16,fp8,0,0.012492799758911132
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,2,2,128,1,fp8,fp8,0,0.012507200241088867
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,2,1,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,2,1,128,1,float16,fp8,0,0.01255200058221817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,2,2,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,2,2,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,2,2,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,2,2,128,1,fp8,fp8,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,2,1,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,2,1,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,2,1,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,2,2,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,2,2,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,2,2,128,1,fp8,fp8,0,0.010331200063228607
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,2,1,128,1,float16,float16,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,2,1,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,2,1,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,2,2,128,1,float16,float16,0,0.010326399654150008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,2,2,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,2,2,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,64,2,1,128,1,fp8,fp8,0,0.035097599029541016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,2,1,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,2,1,128,1,fp8,fp8,0,0.010358399897813796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,2,2,128,1,float16,float16,0,0.010351999849081039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,2,2,128,1,float16,fp8,0,0.01035040020942688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,2,2,128,1,fp8,fp8,0,0.010284800082445145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,2,1,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,2,1,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,2,1,128,1,fp8,fp8,0,0.010342399775981902
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,2,2,128,1,float16,float16,0,0.010315199941396713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,2,2,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,2,2,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,2,1,128,1,float16,float16,0,0.010361599922180175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,2,1,128,1,fp8,fp8,0,0.012430399656295776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,2,1,128,1,float16,fp8,0,0.010335999727249145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,2,1,128,1,fp8,fp8,0,0.009355200082063675
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,2,1,128,1,float16,float16,0,0.020742399990558623
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,2,1,128,1,float16,fp8,0,0.024772800505161285
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,32,2,1,128,1,fp8,fp8,0,0.02484000027179718
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,2,2,128,1,float16,float16,0,0.01454080045223236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,2,2,128,1,float16,fp8,0,0.016756799817085267
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,2,2,128,1,fp8,fp8,0,0.016577599942684172
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,2,1,128,1,float16,float16,0,0.014643199741840363
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,2,1,128,1,float16,fp8,0,0.016569599509239197
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,2,1,128,1,fp8,fp8,0,0.01666239947080612
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,2,2,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,2,2,128,1,float16,fp8,0,0.012412799894809723
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,2,2,128,1,fp8,fp8,0,0.012460800260305405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,2,1,128,1,float16,float16,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,2,1,128,1,float16,fp8,0,0.01247360035777092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,2,1,128,1,fp8,fp8,0,0.012368000298738479
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,2,2,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,2,2,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,2,2,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,2,1,128,1,float16,float16,0,0.010332799702882766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,2,1,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,2,1,128,1,fp8,fp8,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,2,2,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,2,2,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,2,2,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,2,1,128,1,float16,float16,0,0.010360000282526016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,2,1,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,2,1,128,1,fp8,fp8,0,0.010310400277376175
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,2,2,128,1,float16,float16,0,0.009350399672985076
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,2,2,128,1,float16,fp8,0,0.010335999727249145
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,2,2,128,1,fp8,fp8,0,0.008483199775218964
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,2,1,128,1,float16,float16,0,0.010313600301742554
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,2,1,128,1,float16,fp8,0,0.008401600271463394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,2,1,128,1,fp8,fp8,0,0.009267199784517288
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,2,2,128,1,float16,float16,0,0.00846880003809929
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,2,2,128,1,float16,fp8,0,0.009489600360393525
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,2,2,128,1,fp8,fp8,0,0.008460800349712371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,2,1,128,1,float16,float16,0,0.010288000106811523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,2,1,128,1,float16,fp8,0,0.008441600203514098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,2,1,128,1,fp8,fp8,0,0.008819200098514557
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,2,2,128,1,float16,fp8,0,0.008432000130414962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,2,2,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,2,1,128,1,float16,float16,0,0.009808000177145004
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,2,1,128,1,float16,fp8,0,0.008743999898433686
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,2,1,128,1,fp8,fp8,0,0.008423999696969987
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,2,2,128,1,float16,fp8,0,0.00843520015478134
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,2,2,128,1,fp8,fp8,0,0.00846560001373291
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,2,1,128,1,float16,float16,0,0.008489599823951722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,2,1,128,1,float16,fp8,0,0.0083856001496315
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,2,1,128,1,fp8,fp8,0,0.008388800173997879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,2,1,128,1,float16,float16,0,0.018692800402641298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,2,1,128,1,float16,fp8,0,0.020644800364971162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,16,2,1,128,1,fp8,fp8,0,0.020739200711250304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,2,2,128,1,float16,float16,0,0.012943999469280243
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,2,2,128,1,float16,fp8,0,0.014535999298095703
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,2,2,128,1,fp8,fp8,0,0.01446399986743927
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,2,1,128,1,float16,float16,0,0.012518399953842163
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,2,1,128,1,float16,float16,0,0.010366400331258773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,2,1,128,1,float16,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,2,2,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,2,1,128,1,fp8,fp8,0,0.01467680037021637
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,2,2,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,2,2,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,2,1,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,2,1,128,1,float16,fp8,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,2,1,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,2,2,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,2,2,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,2,2,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,2,1,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,2,1,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,2,1,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,2,2,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,2,2,128,1,float16,fp8,0,0.008436799794435502
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,2,2,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,2,1,128,1,float16,float16,0,0.010195200145244599
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,2,1,128,1,float16,fp8,0,0.01053759977221489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,2,2,128,1,fp8,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,2,1,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,2,2,128,1,float16,float16,0,0.010358399897813796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,2,2,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,2,2,128,1,fp8,fp8,0,0.009390400350093841
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,2,1,128,1,float16,float16,0,0.010264000296592713
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,2,1,128,1,float16,fp8,0,0.009217599779367447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,2,2,128,1,float16,float16,0,0.00888959988951683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,2,1,128,1,fp8,fp8,0,0.010283199697732925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,2,2,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,2,2,128,1,float16,fp8,0,0.009140799939632415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,2,2,128,1,fp8,fp8,0,0.010292799770832061
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,2,1,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,2,1,128,1,float16,fp8,0,0.008683200180530547
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,2,1,128,1,fp8,fp8,0,0.010339199751615524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,2,2,128,1,float16,float16,0,0.008376000076532364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,2,2,128,1,float16,fp8,0,0.008454400300979614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,2,2,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,2,1,128,1,float16,float16,0,0.008476799726486206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,2,1,128,1,float16,fp8,0,0.008376000076532364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,2,1,128,1,fp8,fp8,0,0.009987200051546097
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,2,2,128,1,float16,float16,0,0.008454400300979614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,2,2,128,1,float16,fp8,0,0.008377599716186523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,2,2,128,1,fp8,fp8,0,0.008534400165081025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,2,1,128,1,float16,float16,0,0.00844319984316826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,2,1,128,1,float16,fp8,0,0.008377599716186523
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,0,0.018607999384403228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,0,0.016657599806785585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,256,1,2,1,128,1,fp8,fp8,0,0.016598400473594666
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,0,0.012572799623012543
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,0,0.012479999661445617
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,2,2,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,0,0.012465599924325943
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,0,0.012481600046157837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,2,1,128,1,fp8,fp8,0,0.012438400089740754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,2,2,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,2,1,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,2,2,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,2,1,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,0,0.009297599643468856
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,2,2,128,1,fp8,fp8,0,0.009052799642086029
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,0,0.009880000352859497
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,0,0.008662399649620057
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,2,1,128,1,fp8,fp8,0,0.008452799916267396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,0,0.008374399691820144
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,0,0.008472000062465668
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,2,2,128,1,fp8,fp8,0,0.0083856001496315
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,0,0.009273599833250046
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,0,0.008396799862384795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,2,1,128,1,fp8,fp8,0,0.008505599945783615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,2,1,128,1,fp8,fp8,0,0.00835999995470047
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,0,0.01019359976053238
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,0,0.008364800363779068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,2,2,128,1,fp8,fp8,0,0.008462399989366532
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,0,0.009926400333642959
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,2,1,128,1,fp8,fp8,0,0.010283199697732925
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,2,2,128,1,fp8,fp8,0,0.008585599809885025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,0,0.010118400305509567
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,0,0.008507200330495835
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,2,1,128,1,fp8,fp8,0,0.008432000130414962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,0,0.01005759984254837
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,0,0.008388800173997879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,2,2,128,1,fp8,fp8,0,0.00854559987783432
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,0,0.008473599702119828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,0,0.008491200208663941
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,2,1,128,1,fp8,fp8,0,0.008382400125265121
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,1,1,128,1,float16,float16,0,0.258787202835083
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,1,1,128,1,float16,fp8,0,0.2705696105957031
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16384,1,1,128,1,fp8,fp8,0,0.26899359226226804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,1,1,128,1,float16,float16,0,0.16401439905166626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,1,1,128,1,float16,fp8,0,0.17006560564041137
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16384,1,1,128,1,fp8,fp8,0,0.16822079420089722
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,1,1,128,1,float16,float16,0,0.15800000429153443
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,1,1,128,1,float16,fp8,0,0.15179680585861205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16384,1,1,128,1,fp8,fp8,0,0.15181759595870972
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,1,1,128,1,float16,float16,0,0.16374720335006715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,1,1,128,1,float16,fp8,0,0.18041759729385376
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,1,1,128,1,float16,float16,0,0.12505120038986206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,12288,1,1,128,1,fp8,fp8,0,0.18054399490356446
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,1,1,128,1,float16,fp8,0,0.12986719608306885
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,1,1,128,1,float16,float16,0,0.1211616039276123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,12288,1,1,128,1,fp8,fp8,0,0.1311519980430603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,1,1,128,1,float16,fp8,0,0.11712160110473632
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,12288,1,1,128,1,fp8,fp8,0,0.11697599887847901
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,1,1,128,1,float16,float16,0,0.12261120080947877
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,1,1,128,1,float16,fp8,0,0.1394384026527405
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,10240,1,1,128,1,fp8,fp8,0,0.13939520120620727
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,1,1,128,1,float16,float16,0,0.10486880540847779
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,1,1,128,1,float16,fp8,0,0.11074240207672119
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,10240,1,1,128,1,fp8,fp8,0,0.11083040237426758
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,1,1,128,1,float16,float16,0,0.10263680219650269
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,1,1,128,1,float16,fp8,0,0.10051519870758056
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,10240,1,1,128,1,fp8,fp8,0,0.10060479640960693
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,1,1,128,1,float16,float16,0,0.14952640533447265
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,1,1,128,1,float16,fp8,0,0.1789199948310852
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,8192,1,1,128,1,fp8,fp8,0,0.18034080266952515
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,1,1,128,1,float16,float16,0,0.09241920113563537
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,1,1,128,1,float16,fp8,0,0.10868320465087891
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,8192,1,1,128,1,fp8,fp8,0,0.10867359638214111
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,1,1,128,1,float16,fp8,0,0.0903760015964508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,1,1,128,1,fp8,fp8,0,0.09035519957542419
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,1,1,128,1,float16,float16,0,0.08429440259933471
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,1,1,128,1,float16,fp8,0,0.08231040239334106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,8192,1,1,128,1,fp8,fp8,0,0.08215360045433044
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,1,1,128,1,float16,float16,0,0.09641759991645812
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,1,1,128,1,float16,fp8,0,0.12504639625549316
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,6144,1,1,128,1,fp8,fp8,0,0.12463040351867676
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,1,1,128,1,float16,float16,0,0.07183200120925903
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,1,1,128,1,float16,fp8,0,0.08413119912147522
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,0,0.008499199897050858
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,6144,1,1,128,1,fp8,fp8,0,0.08425279855728149
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,1,1,128,1,float16,float16,0,0.06778560280799865
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,1,1,128,1,float16,fp8,0,0.07000640034675598
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,6144,1,1,128,1,fp8,fp8,0,0.06985599994659424
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,1,1,128,1,float16,float16,0,0.06585760116577148
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,1,1,128,1,float16,fp8,0,0.06376320123672485
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,6144,1,1,128,1,fp8,fp8,0,0.06373760104179382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,1,1,128,1,float16,float16,0,0.09228960275650025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,1,1,128,1,float16,fp8,0,0.13266079425811766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,4096,1,1,128,1,fp8,fp8,0,0.1313599944114685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,1,1,128,1,float16,float16,0,0.05750399827957153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,1,1,128,1,fp8,fp8,0,0.07605119943618774
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,1,1,128,1,float16,float16,0,0.05134879946708679
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,1,1,128,1,float16,fp8,0,0.05955039858818054
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,4096,1,1,128,1,fp8,fp8,0,0.05958560109138489
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,1,1,128,1,float16,float16,0,0.04936319887638092
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,1,1,128,1,float16,fp8,0,0.05136479735374451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,4096,1,1,128,1,fp8,fp8,0,0.05138720273971557
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,1,1,128,1,float16,float16,0,0.04730879962444305
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,1,1,128,1,float16,fp8,0,0.0472463995218277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,4096,1,1,128,1,fp8,fp8,0,0.04720959961414337
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,1,1,128,1,float16,float16,0,0.06378880143165588
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,1,1,128,1,float16,fp8,0,0.09463840126991271
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,3072,1,1,128,1,fp8,fp8,0,0.09462720155715942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,1,1,128,1,float16,float16,0,0.04436480104923248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,1,1,128,1,float16,fp8,0,0.059654402732849124
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,3072,1,1,128,1,fp8,fp8,0,0.059513598680496216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,1,1,128,1,float16,float16,0,0.04113759994506836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,1,1,128,1,float16,fp8,0,0.04729759991168976
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,3072,1,1,128,1,fp8,fp8,0,0.04732480049133301
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,1,1,128,1,float16,float16,0,0.03923520147800445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,1,1,128,1,fp8,fp8,0,0.041099199652671815
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,1,1,128,1,float16,float16,0,0.037145599722862244
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,1,1,128,1,float16,fp8,0,0.03709439933300018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,4096,1,1,128,1,float16,fp8,0,0.07642239928245545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,1,1,128,1,float16,float16,0,0.0637935996055603
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,1,1,128,1,float16,fp8,0,0.10681439638137817
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,2048,1,1,128,1,fp8,fp8,0,0.10703359842300415
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,1,1,128,1,float16,float16,0,0.03908640146255493
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,1,1,128,1,float16,fp8,0,0.05968639850616455
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,2048,1,1,128,1,fp8,fp8,0,0.05963199734687805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,1,1,128,1,float16,float16,0,0.03293440043926239
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,1,1,128,1,float16,fp8,0,0.04315840005874634
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,2048,1,1,128,1,fp8,fp8,0,0.04322080016136169
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,1,1,128,1,float16,float16,0,0.030939200520515443
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,1,1,128,1,float16,fp8,0,0.034964799880981445
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,2048,1,1,128,1,fp8,fp8,0,0.03498719930648804
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,1,1,128,1,float16,float16,0,0.02885119915008545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,1,1,128,1,float16,fp8,0,0.030899199843406677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,2048,1,1,128,1,fp8,fp8,0,0.030825600028038025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,1,1,128,1,float16,float16,0,0.028881600499153136
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,3072,1,1,128,1,float16,fp8,0,0.04113439917564392
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,1,1,128,1,float16,fp8,0,0.028859201073646545
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,2048,1,1,128,1,fp8,fp8,0,0.028798401355743408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,1,1,128,1,float16,float16,0,0.047358399629592894
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,1,1,128,1,float16,fp8,0,0.08014879822731018
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1536,1,1,128,1,fp8,fp8,0,0.08013120293617249
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,1,1,128,1,float16,float16,0,0.031007999181747438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,1,1,128,1,float16,fp8,0,0.04727360010147095
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1536,1,1,128,1,fp8,fp8,0,0.047332799434661864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,1,1,128,1,float16,float16,0,0.026815998554229736
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,1,1,128,1,float16,fp8,0,0.035043200850486754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1536,1,1,128,1,fp8,fp8,0,0.034971201419830324
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,1,1,128,1,float16,float16,0,0.024831999838352204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,1,1,128,1,float16,fp8,0,0.028907200694084166
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1536,1,1,128,1,fp8,fp8,0,0.028880000114440918
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,1,1,128,1,float16,float16,0,0.024766400456428528
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,1,1,128,1,float16,fp8,0,0.024817599356174468
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1536,1,1,128,1,fp8,fp8,0,0.024828800559043886
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,1,1,128,1,float16,float16,0,0.024694399535655977
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,1,1,128,1,float16,fp8,0,0.024303999543190003
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1536,1,1,128,1,fp8,fp8,0,0.02466080039739609
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,1,1,128,1,float16,float16,0,0.050944000482559204
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,1,1,128,1,float16,fp8,0,0.09442880153656005
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1024,1,1,128,1,fp8,fp8,0,0.09436799883842469
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,1,1,128,1,float16,float16,0,0.028968000411987306
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,1,1,128,1,float16,fp8,0,0.05146719813346863
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1024,1,1,128,1,fp8,fp8,0,0.053799998760223386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,1,1,128,1,float16,float16,0,0.02481440007686615
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,1,1,128,1,float16,fp8,0,0.0350816011428833
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1024,1,1,128,1,fp8,fp8,0,0.03497599959373474
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,1,1,128,1,float16,float16,0,0.022675199806690215
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,1,1,128,1,float16,fp8,0,0.0268095999956131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1024,1,1,128,1,fp8,fp8,0,0.0268095999956131
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,1,1,128,1,float16,float16,0,0.02067359983921051
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,1,1,128,1,float16,fp8,0,0.022683200240135194
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1024,1,1,128,1,fp8,fp8,0,0.022678400576114654
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,1,1,128,1,float16,float16,0,0.019944000244140624
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,1,1,128,1,float16,fp8,0,0.020670400559902193
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1024,1,1,128,1,fp8,fp8,0,0.020729599893093108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,1,1,128,1,float16,float16,0,0.020377600193023683
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,8192,1,1,128,1,float16,float16,0,0.08656799793243408
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,1,1,128,1,float16,fp8,0,0.018673600256443025
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1024,1,1,128,1,fp8,fp8,0,0.018777599930763243
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,1,1,128,1,float16,float16,0,0.04320479929447174
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,1,1,128,1,float16,fp8,0,0.08835520148277283
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,512,1,1,128,1,fp8,fp8,0,0.08818560242652893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,1,1,128,1,float16,float16,0,0.024915200471878052
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,1,1,128,1,float16,fp8,0,0.0489760011434555
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,512,1,1,128,1,fp8,fp8,0,0.049414399266242984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,1,1,128,1,float16,float16,0,0.018705600500106813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,1,1,128,1,float16,fp8,0,0.03107680082321167
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,1,1,128,1,float16,float16,0,0.01666080057621002
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,1,1,128,1,float16,fp8,0,0.022732800245285033
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,512,1,1,128,1,fp8,fp8,0,0.02279199957847595
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,1,1,128,1,float16,float16,0,0.0165120005607605
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,1,1,128,1,float16,fp8,0,0.01863040030002594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,512,1,1,128,1,fp8,fp8,0,0.018617600202560425
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,1,1,128,1,float16,float16,0,0.014579200744628906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,1,1,128,1,float16,fp8,0,0.0165583997964859
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,512,1,1,128,1,fp8,fp8,0,0.01668799966573715
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,1,1,128,1,float16,float16,0,0.014635199308395385
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,1,1,128,1,float16,fp8,0,0.014870400726795196
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,512,1,1,128,1,fp8,fp8,0,0.014665600657463074
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,1,1,128,1,float16,float16,0,0.014679999649524688
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,1,1,128,1,float16,fp8,0,0.014558400213718414
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,512,1,1,128,1,fp8,fp8,0,0.01459839940071106
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,3072,1,1,128,1,fp8,fp8,0,0.03716799914836884
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,1,1,128,1,float16,float16,0,0.02266079932451248
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,1,1,128,1,fp8,fp8,0,0.0453247994184494
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,256,1,1,128,1,float16,fp8,0,0.04536640048027039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,1,1,128,1,float16,float16,0,0.01671359986066818
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,1,1,128,1,float16,fp8,0,0.028887999057769776
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,256,1,1,128,1,fp8,fp8,0,0.02897599935531616
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,1,1,128,1,float16,fp8,0,0.020667199790477753
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,1,1,128,1,fp8,fp8,0,0.020695999264717102
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,1,1,128,1,float16,float16,0,0.014440000057220459
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,1,1,128,1,float16,fp8,0,0.016659200191497803
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,256,1,1,128,1,fp8,fp8,0,0.016657599806785585
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,1,1,128,1,float16,fp8,0,0.014460800588130951
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,1,1,128,1,float16,float16,0,0.01385439932346344
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,256,1,1,128,1,fp8,fp8,0,0.014510400593280792
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,1,1,128,1,float16,float16,0,0.012742400169372559
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,1,1,128,1,float16,fp8,0,0.012537600100040435
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,256,1,1,128,1,fp8,fp8,0,0.012880000472068786
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,1,1,128,1,float16,fp8,0,0.012827199697494508
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,1,1,128,1,fp8,fp8,0,0.012731200456619263
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,1,1,128,1,float16,float16,0,0.012691199779510498
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,1,1,128,1,float16,fp8,0,0.012615999579429627
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,256,1,1,128,1,fp8,fp8,0,0.012727999687194824
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,1,1,128,1,float16,float16,0,0.017067199945449828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,1,1,128,1,float16,fp8,0,0.027088001370429993
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,128,1,1,128,1,fp8,fp8,0,0.02699680030345917
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,1,1,128,1,float16,float16,0,0.012807999551296235
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,1,1,128,1,float16,fp8,0,0.018769599497318268
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,128,1,1,128,1,fp8,fp8,0,0.01876160055398941
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,256,1,1,128,1,float16,float16,0,0.014507199823856353
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,1,1,128,1,float16,float16,0,0.011275199800729751
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,1,1,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,128,1,1,128,1,fp8,fp8,0,0.014588800072669984
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,1,1,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,1,1,128,1,float16,fp8,0,0.012478400021791458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,128,1,1,128,1,fp8,fp8,0,0.012478400021791458
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,1,1,128,1,float16,float16,0,0.010654400289058685
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,1,1,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,128,1,1,128,1,fp8,fp8,0,0.010609599947929382
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,1,1,128,1,float16,fp8,0,0.010623999685049058
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,1,1,128,1,fp8,fp8,0,0.010579200088977813
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,1,1,128,1,float16,float16,0,0.010707200318574906
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,1,1,128,1,float16,fp8,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,256,1,1,128,1,float16,float16,0,0.012614400684833526
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,128,1,1,128,1,fp8,fp8,0,0.01061599999666214
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,1,1,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,1,1,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,1,1,128,1,float16,float16,0,0.01266079992055893
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,1,1,128,1,float16,fp8,0,0.018718400597572328
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,64,1,1,128,1,fp8,fp8,0,0.018828800320625304
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,1,1,128,1,float16,float16,0,0.01242400035262108
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,1,1,128,1,float16,fp8,0,0.014497600495815277
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,64,1,1,128,1,fp8,fp8,0,0.014457599818706512
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,1,1,128,1,float16,float16,0,0.010351999849081039
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,1,1,128,1,float16,fp8,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,64,1,1,128,1,fp8,fp8,0,0.01250080019235611
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,1,1,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,1,1,128,1,float16,fp8,0,0.010571199655532836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,64,1,1,128,1,fp8,fp8,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,1,1,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,1,1,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,512,1,1,128,1,fp8,fp8,0,0.030899199843406677
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,64,1,1,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,1,1,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,1,1,128,1,float16,fp8,0,0.010328000038862228
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,64,1,1,128,1,fp8,fp8,0,0.00939520001411438
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,128,1,1,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,1,1,128,1,float16,fp8,0,0.008369600027799606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,1,1,128,1,float16,float16,0,0.009363199770450591
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,64,1,1,128,1,fp8,fp8,0,0.008398400247097015
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,1,1,128,1,float16,float16,0,0.008459199965000153
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,1,1,128,1,float16,fp8,0,0.008473599702119828
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,64,1,1,128,1,fp8,fp8,0,0.008388800173997879
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,1,1,128,1,float16,float16,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,1,1,128,1,float16,fp8,0,0.014526399970054626
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,32,1,1,128,1,fp8,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,1,1,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,1,1,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,32,1,1,128,1,fp8,fp8,0,0.011727999895811081
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,128,1,1,128,1,fp8,fp8,0,0.010355199873447418
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,1,1,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,1,1,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,32,1,1,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,1,1,128,1,float16,fp8,0,0.009507200121879578
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,1,1,128,1,fp8,fp8,0,0.010281600058078766
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,1,1,128,1,float16,float16,0,0.008524800091981888
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,1,1,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,32,1,1,128,1,fp8,fp8,0,0.008392000198364257
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,1,1,128,1,float16,float16,0,0.010255999863147736
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,1,1,128,1,float16,fp8,0,0.008460800349712371
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,32,1,1,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,1,1,128,1,float16,float16,0,0.008367999643087386
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,1,1,128,1,fp8,fp8,0,0.008372800052165985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,1,1,128,1,float16,float16,0,0.00844319984316826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,1,1,128,1,float16,fp8,0,0.008372800052165985
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,32,1,1,128,1,fp8,fp8,0,0.008481600135564805
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,1,1,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,1,1,128,1,float16,fp8,0,0.01244800016283989
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,16,1,1,128,1,fp8,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,1,1,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,1,1,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,16,1,1,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,1,1,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,1,1,128,1,float16,fp8,0,0.010339199751615524
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,16,1,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,1,1,128,1,float16,float16,0,0.009097599983215332
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,1,1,128,1,float16,fp8,0,0.008395200222730636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,16,1,1,128,1,fp8,fp8,0,0.008463999629020691
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,1,1,128,1,float16,float16,0,0.008476799726486206
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,1,1,128,1,float16,fp8,0,0.00841120034456253
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,16,1,1,128,1,fp8,fp8,0,0.008417599648237229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,32,1,1,128,1,float16,float16,0,0.009192000329494476
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,1,1,128,1,float16,float16,0,0.010326399654150008
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,1,1,128,1,float16,fp8,0,0.008401600271463394
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,16,1,1,128,1,fp8,fp8,0,0.008483199775218964
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,1,1,128,1,float16,float16,0,0.008393599838018417
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,1,1,128,1,float16,fp8,0,0.008364800363779068
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,16,1,1,128,1,fp8,fp8,0,0.00843840017914772
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,1,1,128,1,float16,float16,0,0.008433599770069123
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,1,1,128,1,float16,fp8,0,0.008404800295829773
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,16,1,1,128,1,fp8,fp8,0,0.008417599648237229
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,32,1,1,128,1,float16,fp8,0,0.010380800068378448
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,128,1,1,1,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,0,0.008452799916267396
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,1,1,128,1,fp8,fp8,0,0.010307200253009796
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,0,0.008423999696969987
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,0,0.010571199655532836
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,0,0.009382399916648864
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,0,0.008931200206279754
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,16,1,1,1,128,1,fp8,fp8,0,0.008432000130414962
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,0,0.008395200222730636
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,1,1,128,1,fp8,fp8,0,0.008376000076532364
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,0,0.008924800157546996
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,0,0.008422400057315826
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,4,1,1,1,128,1,fp8,fp8,0,0.008419200032949447
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,0,0.008449599891901017
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,0,0.008355200290679932
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,2,1,1,1,128,1,fp8,fp8,0,0.008345600217580795
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,0,0.008369600027799606
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,0,0.008350399881601333
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,1,1,1,1,128,1,fp8,fp8,0,0.008441600203514098
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,32,1,1,1,128,1,fp8,fp8,0,0.01029760017991066
SGLang,0.5.9,NVIDIA GB300,context_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,0,0.01016639992594719
