framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,1,128,1,fp8,fp8,0,34.200137329101565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,1,128,1,float16,fp8,0,36.561669921875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,2,128,1,float16,fp8,0,34.96964721679687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,2,128,1,fp8,fp8,0,37.14952697753906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,4,128,1,float16,fp8,0,37.436669921875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,4,128,1,fp8,fp8,0,37.80103454589844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,1,128,1,float16,float16,0,58.879327392578126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,2,128,1,float16,float16,0,56.11378784179688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,8,128,1,float16,fp8,0,37.60763244628906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,8,128,1,fp8,fp8,0,35.10385437011719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,96,128,1,float16,fp8,0,17.648701477050782
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,4,128,1,float16,float16,0,56.7423828125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,96,128,1,fp8,fp8,0,19.012725830078125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,1,128,1,float16,fp8,0,17.60616455078125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,1,128,1,float16,float16,0,28.549160766601563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,1,128,1,fp8,fp8,0,17.745103454589845
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,2,128,1,float16,fp8,0,18.05628662109375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,2,128,1,float16,float16,0,28.785110473632812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,2,128,1,fp8,fp8,0,18.01746368408203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,4,128,1,float16,float16,0,29.008267211914063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,8,128,1,float16,float16,0,62.8817138671875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,4,128,1,float16,fp8,0,18.098043823242186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,4,128,1,fp8,fp8,0,17.76708068847656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,8,128,1,float16,fp8,0,18.175738525390624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,8,128,1,float16,float16,0,31.375286865234376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,8,128,1,fp8,fp8,0,18.214495849609374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,96,128,1,float16,fp8,0,9.782892608642578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,96,128,1,fp8,fp8,0,9.348961639404298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,1,128,1,float16,fp8,0,8.989920043945313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,1,128,1,float16,float16,0,15.229083251953124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,1,128,1,fp8,fp8,0,9.00278549194336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,2,128,1,float16,fp8,0,9.055961608886719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,2,128,1,float16,float16,0,15.133334350585937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,2,128,1,fp8,fp8,0,8.94000015258789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,4,128,1,float16,float16,0,15.353131103515626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,4,128,1,float16,fp8,0,8.9163330078125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,4,128,1,fp8,fp8,0,9.921196746826173
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,96,128,1,float16,float16,0,13.681285095214843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,8,128,1,float16,float16,0,14.894056701660157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,8,128,1,float16,fp8,0,9.763256072998047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,8,128,1,fp8,fp8,0,8.328205108642578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,96,128,1,float16,float16,0,7.462673950195312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,96,128,1,float16,fp8,0,4.952323150634766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,96,128,1,fp8,fp8,0,4.845326232910156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,96,128,1,float16,float16,0,31.4117431640625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,1,128,1,float16,fp8,0,4.680185699462891
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,1,128,1,float16,float16,0,7.0321601867675785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,1,128,1,fp8,fp8,0,4.337958526611328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,2,128,1,float16,fp8,0,4.5856975555419925
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,2,128,1,fp8,fp8,0,4.303283309936523
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,2,128,1,float16,float16,0,7.295318603515625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,4,128,1,float16,float16,0,6.883602905273437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,4,128,1,fp8,fp8,0,4.439328002929687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,4,128,1,float16,fp8,0,4.724956893920899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,8,128,1,float16,fp8,0,4.278779220581055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,8,128,1,float16,float16,0,6.970460510253906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,8,128,1,fp8,fp8,0,4.4479118347167965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,1,128,1,float16,fp8,0,20.522981262207033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,1,128,1,fp8,fp8,0,22.12777099609375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,1,128,1,float16,float16,0,36.82742919921875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,2,128,1,float16,float16,0,33.858334350585935
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,2,128,1,float16,fp8,0,20.781813049316405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,2,128,1,fp8,fp8,0,22.23771514892578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,4,128,1,float16,fp8,0,20.900267028808592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,4,128,1,fp8,fp8,0,20.78363037109375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,4,128,1,float16,float16,0,36.933737182617186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,8,128,1,float16,fp8,0,21.31293487548828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,8,128,1,float16,float16,0,35.96147155761719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,8,128,1,fp8,fp8,0,20.88169708251953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,96,128,1,float16,float16,0,19.529823303222656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,96,128,1,float16,fp8,0,10.99048309326172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,96,128,1,fp8,fp8,0,11.742638397216798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,1,128,1,float16,fp8,0,10.95733413696289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,1,128,1,fp8,fp8,0,10.547711944580078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,1,128,1,float16,float16,0,17.04667510986328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,2,128,1,fp8,fp8,0,10.509505462646484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,2,128,1,float16,float16,0,18.184555053710938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,2,128,1,float16,fp8,0,11.021147155761719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,4,128,1,float16,float16,0,16.646014404296874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,4,128,1,float16,fp8,0,10.460205078125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,4,128,1,fp8,fp8,0,10.936678314208985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,8,128,1,float16,fp8,0,10.240144348144531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,8,128,1,float16,float16,0,19.23572692871094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,96,128,1,float16,float16,0,9.69677734375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,8,128,1,fp8,fp8,0,10.604691314697266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,96,128,1,float16,fp8,0,5.157523345947266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,96,128,1,fp8,fp8,0,5.7213390350341795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,1,128,1,float16,fp8,0,5.0267280578613285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,1,128,1,fp8,fp8,0,4.95654411315918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,1,128,1,float16,float16,0,9.13051986694336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,2,128,1,float16,fp8,0,4.948627090454101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,2,128,1,fp8,fp8,0,5.110420989990234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,2,128,1,float16,float16,0,8.484446716308593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,4,128,1,float16,fp8,0,5.167411041259766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,4,128,1,fp8,fp8,0,5.011697769165039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,4,128,1,float16,float16,0,8.823561859130859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,8,128,1,float16,fp8,0,5.117820739746094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,96,128,1,float16,float16,0,4.619230270385742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,8,128,1,fp8,fp8,0,5.16899528503418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,8,128,1,float16,float16,0,7.8790748596191404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,96,128,1,float16,fp8,0,2.7769472122192385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,96,128,1,fp8,fp8,0,2.85675048828125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,1,128,1,float16,float16,0,3.7882095336914063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,1,128,1,float16,fp8,0,2.52432804107666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,1,128,1,fp8,fp8,0,2.4830080032348634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,2,128,1,float16,fp8,0,2.3378671646118163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,2,128,1,float16,float16,0,3.9373279571533204
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,2,128,1,fp8,fp8,0,2.6068496704101562
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,4,128,1,float16,fp8,0,2.3202703475952147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,4,128,1,float16,float16,0,3.723366546630859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,4,128,1,fp8,fp8,0,2.530067253112793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,8,128,1,float16,float16,0,4.14256477355957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,8,128,1,float16,fp8,0,2.46569766998291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,8,128,1,fp8,fp8,0,2.5610464096069334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,1,128,1,fp8,fp8,0,15.135972595214843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,1,128,1,float16,fp8,0,15.691047668457031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,1,128,1,float16,float16,0,23.794369506835938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,2,128,1,float16,float16,0,24.164683532714843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,2,128,1,fp8,fp8,0,15.71204071044922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,2,128,1,float16,fp8,0,16.965229797363282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,4,128,1,float16,fp8,0,14.567124938964843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,4,128,1,float16,float16,0,23.567489624023438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,4,128,1,fp8,fp8,0,15.976724243164062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,8,128,1,float16,fp8,0,15.113919067382813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,8,128,1,float16,float16,0,26.944573974609376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,8,128,1,fp8,fp8,0,14.436605834960938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,96,128,1,float16,float16,0,13.954241943359374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,96,128,1,float16,fp8,0,7.816448211669922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,96,128,1,fp8,fp8,0,7.909490966796875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,1,128,1,float16,fp8,0,7.740980529785157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,1,128,1,fp8,fp8,0,7.362529754638672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,1,128,1,float16,float16,0,12.01208953857422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,2,128,1,float16,fp8,0,7.64437255859375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,2,128,1,fp8,fp8,0,7.367774200439453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,2,128,1,float16,float16,0,13.044805908203125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,4,128,1,float16,float16,0,11.920336151123047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,4,128,1,float16,fp8,0,7.829676818847656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,4,128,1,fp8,fp8,0,7.443653106689453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,8,128,1,float16,fp8,0,7.012217712402344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,96,128,1,float16,float16,0,6.3894798278808596
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,8,128,1,fp8,fp8,0,8.133084869384765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,96,128,1,float16,fp8,0,3.6330623626708984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,8,128,1,float16,float16,0,13.361500549316407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,96,128,1,fp8,fp8,0,3.7138225555419924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,1,128,1,float16,float16,0,6.3577934265136715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,1,128,1,fp8,fp8,0,3.2853153228759764
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,1,128,1,float16,fp8,0,3.482838439941406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,2,128,1,float16,float16,0,5.903425598144532
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,2,128,1,fp8,fp8,0,3.5646209716796875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,2,128,1,float16,fp8,0,3.821992111206055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,4,128,1,float16,float16,0,5.66016960144043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,4,128,1,float16,fp8,0,3.6287200927734373
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,4,128,1,fp8,fp8,0,3.2782222747802736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,8,128,1,float16,fp8,0,3.6371936798095703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,8,128,1,float16,float16,0,6.093272018432617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,96,128,1,float16,float16,0,2.4135616302490233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,8,128,1,fp8,fp8,0,3.6452911376953123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,96,128,1,float16,fp8,0,1.9264623641967773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,96,128,1,fp8,fp8,0,2.1673343658447264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,1,128,1,float16,fp8,0,1.7933759689331055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,1,128,1,float16,float16,0,2.8553056716918945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,1,128,1,fp8,fp8,0,1.7007007598876953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,2,128,1,float16,fp8,0,1.7060432434082031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,2,128,1,float16,float16,0,2.6623615264892577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,2,128,1,fp8,fp8,0,1.9496559143066405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,4,128,1,float16,float16,0,1.94420166015625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,4,128,1,float16,fp8,0,1.7087663650512694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,4,128,1,fp8,fp8,0,1.927992057800293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,8,128,1,float16,float16,0,1.938003158569336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,8,128,1,float16,fp8,0,2.080614471435547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,8,128,1,fp8,fp8,0,1.73721923828125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,1,128,1,fp8,fp8,0,19.016891479492188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,1,128,1,float16,fp8,0,21.242864990234374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,2,128,1,float16,float16,0,31.42847900390625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,1,128,1,float16,float16,0,33.44914855957031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,2,128,1,float16,fp8,0,19.397267150878907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,2,128,1,fp8,fp8,0,21.394630432128906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,4,128,1,float16,fp8,0,20.890087890625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,4,128,1,fp8,fp8,0,19.51219940185547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,4,128,1,float16,float16,0,31.57607421875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,8,128,1,float16,fp8,0,21.226190185546876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,8,128,1,float16,float16,0,34.3283935546875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,8,128,1,fp8,fp8,0,19.608277893066408
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,96,128,1,float16,float16,0,16.957669067382813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,96,128,1,float16,fp8,0,11.201409912109375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,96,128,1,fp8,fp8,0,11.31090850830078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,1,128,1,float16,fp8,0,9.350516510009765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,1,128,1,float16,float16,0,15.993301391601562
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,1,128,1,fp8,fp8,0,10.359185791015625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,2,128,1,float16,fp8,0,9.563180541992187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,2,128,1,fp8,fp8,0,9.403142547607422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,2,128,1,float16,float16,0,17.395356750488283
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,4,128,1,float16,fp8,0,9.855831909179688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,4,128,1,float16,float16,0,17.491314697265626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,4,128,1,fp8,fp8,0,9.357733154296875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,8,128,1,float16,fp8,0,9.839379119873048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,96,128,1,float16,float16,0,8.491545867919921
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,8,128,1,float16,float16,0,16.915016174316406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,8,128,1,fp8,fp8,0,10.287500762939453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,96,128,1,float16,fp8,0,5.021080017089844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,96,128,1,fp8,fp8,0,5.173870468139649
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,1,128,1,float16,fp8,0,4.817558288574219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,1,128,1,float16,float16,0,8.35322265625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,1,128,1,fp8,fp8,0,4.478425598144531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,2,128,1,float16,float16,0,8.016336059570312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,2,128,1,fp8,fp8,0,4.3217823028564455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,2,128,1,float16,fp8,0,5.159771347045899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,4,128,1,float16,float16,0,6.951305389404297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,4,128,1,fp8,fp8,0,4.429974365234375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,4,128,1,float16,fp8,0,5.115897750854492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,8,128,1,float16,fp8,0,4.521750259399414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,8,128,1,float16,float16,0,8.165335845947265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,96,128,1,float16,float16,0,3.2676929473876952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,8,128,1,fp8,fp8,0,4.491468811035157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,96,128,1,float16,fp8,0,2.5420368194580076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,96,128,1,fp8,fp8,0,2.7999168395996095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,1,128,1,float16,fp8,0,2.1951776504516602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,1,128,1,fp8,fp8,0,2.2225759506225584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,1,128,1,float16,float16,0,4.181825637817383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,2,128,1,float16,fp8,0,2.1791248321533203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,2,128,1,fp8,fp8,0,2.2463232040405274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,2,128,1,float16,float16,0,3.6110687255859375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,4,128,1,float16,fp8,0,2.220804786682129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,4,128,1,float16,float16,0,4.122625732421875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,4,128,1,fp8,fp8,0,2.3757200241088867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,96,128,1,float16,float16,0,1.4425583839416505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,8,128,1,float16,fp8,0,2.2223167419433594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,8,128,1,float16,float16,0,4.152159881591797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,8,128,1,fp8,fp8,0,2.5922447204589845
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,96,128,1,float16,fp8,0,1.4739760398864745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,96,128,1,fp8,fp8,0,1.2938976287841797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,1,128,1,float16,fp8,0,1.1543168067932128
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,1,128,1,float16,float16,0,2.0659839630126955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,1,128,1,fp8,fp8,0,1.138155174255371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,2,128,1,float16,float16,0,1.3054143905639648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,2,128,1,float16,fp8,0,1.2044495582580566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,4,128,1,float16,float16,0,1.2842944145202637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,2,128,1,fp8,fp8,0,1.4911215782165528
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,4,128,1,float16,fp8,0,1.437777614593506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,4,128,1,fp8,fp8,0,1.2031984329223633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,8,128,1,float16,fp8,0,1.12598876953125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,8,128,1,float16,float16,0,1.2875247955322267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,8,128,1,fp8,fp8,0,1.1692543983459474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,1,128,1,fp8,fp8,0,11.180513763427735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,1,128,1,float16,fp8,0,11.7885986328125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,1,128,1,float16,float16,0,19.59246368408203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,2,128,1,float16,float16,0,17.74117431640625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,2,128,1,float16,fp8,0,11.224992370605468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,2,128,1,fp8,fp8,0,12.506944274902343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,4,128,1,float16,fp8,0,11.290528106689454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,4,128,1,fp8,fp8,0,11.135179138183593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,4,128,1,float16,float16,0,19.40819396972656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,8,128,1,float16,fp8,0,11.230604553222657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,8,128,1,float16,float16,0,20.429484558105468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,8,128,1,fp8,fp8,0,11.279007720947266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,96,128,1,float16,float16,0,10.7928466796875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,96,128,1,float16,fp8,0,6.131192016601562
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,96,128,1,fp8,fp8,0,6.302492904663086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,1,128,1,fp8,fp8,0,5.506464004516602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,1,128,1,float16,fp8,0,5.756414413452148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,1,128,1,float16,float16,0,9.016203308105469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,2,128,1,float16,fp8,0,5.517225646972657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,2,128,1,fp8,fp8,0,5.90882568359375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,4,128,1,float16,float16,0,8.939497375488282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,2,128,1,float16,float16,0,9.683793640136718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,4,128,1,float16,fp8,0,5.487225723266602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,4,128,1,fp8,fp8,0,5.953776168823242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,8,128,1,float16,fp8,0,5.5920257568359375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,8,128,1,fp8,fp8,0,5.664700698852539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,96,128,1,float16,float16,0,5.250206375122071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,8,128,1,float16,float16,0,8.942809295654296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,96,128,1,float16,fp8,0,2.96529598236084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,96,128,1,fp8,fp8,0,2.940287971496582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,1,128,1,fp8,fp8,0,2.509516716003418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,1,128,1,float16,fp8,0,2.7640928268432616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,1,128,1,float16,float16,0,4.945489501953125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,2,128,1,float16,float16,0,4.214659118652344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,2,128,1,float16,fp8,0,2.5660816192626954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,2,128,1,fp8,fp8,0,2.549835205078125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,4,128,1,float16,float16,0,4.750953674316406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,4,128,1,fp8,fp8,0,2.6066959381103514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,8,128,1,float16,float16,0,3.8670974731445313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,8,128,1,float16,fp8,0,2.863180732727051
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,8,128,1,fp8,fp8,0,2.932636833190918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,96,128,1,float16,float16,0,2.0722383499145507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,4,128,1,float16,fp8,0,2.8734720230102537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,96,128,1,float16,fp8,0,1.4735376358032226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,96,128,1,fp8,fp8,0,1.5819135665893556
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,1,128,1,float16,fp8,0,1.2707679748535157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,1,128,1,float16,float16,0,2.058665657043457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,1,128,1,fp8,fp8,0,1.295528030395508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,2,128,1,float16,float16,0,1.5188015937805175
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,2,128,1,float16,fp8,0,1.2701071739196776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,2,128,1,fp8,fp8,0,1.4841872215270997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,4,128,1,float16,float16,0,1.507476806640625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,4,128,1,fp8,fp8,0,1.2847887992858886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,4,128,1,float16,fp8,0,1.6892000198364259
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,8,128,1,float16,float16,0,1.451636791229248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,8,128,1,float16,fp8,0,1.598742389678955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,8,128,1,fp8,fp8,0,1.2702447891235351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,96,128,1,float16,float16,0,0.9129887580871582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,96,128,1,float16,fp8,0,0.7972671985626221
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,96,128,1,fp8,fp8,0,0.7965760231018066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,1,128,1,float16,float16,0,0.799457597732544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,1,128,1,float16,fp8,0,0.7418255805969238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,1,128,1,fp8,fp8,0,0.6706543922424316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,2,128,1,float16,float16,0,0.8014752388000488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,2,128,1,float16,fp8,0,0.6829936027526855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,2,128,1,fp8,fp8,0,0.6708079814910889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,4,128,1,float16,fp8,0,0.679795217514038
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,4,128,1,fp8,fp8,0,0.6702896118164062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,8,128,1,float16,float16,0,0.7698463916778564
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,8,128,1,float16,fp8,0,0.6793056011199952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,8,128,1,fp8,fp8,0,0.8225071907043457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,4,128,1,float16,float16,0,1.0893872261047364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,1,128,1,float16,fp8,0,10.039315032958985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,1,128,1,fp8,fp8,0,9.823802947998047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,1,128,1,float16,float16,0,18.136199951171875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,2,128,1,float16,float16,0,16.739788818359376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,2,128,1,float16,fp8,0,10.116639709472656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,2,128,1,fp8,fp8,0,10.02825927734375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,4,128,1,float16,fp8,0,11.81240463256836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,4,128,1,fp8,fp8,0,10.705207824707031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,4,128,1,float16,float16,0,18.250302124023438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,8,128,1,float16,float16,0,17.870606994628908
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,8,128,1,float16,fp8,0,11.596419525146484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,8,128,1,fp8,fp8,0,10.462209320068359
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,96,128,1,float16,float16,0,10.315932464599609
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,96,128,1,float16,fp8,0,5.764734268188477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,96,128,1,fp8,fp8,0,6.3986351013183596
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,1,128,1,float16,float16,0,8.451859283447266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,1,128,1,float16,fp8,0,5.383919906616211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,1,128,1,fp8,fp8,0,4.983720016479492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,2,128,1,fp8,fp8,0,5.605126571655274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,2,128,1,float16,fp8,0,5.173123168945312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,2,128,1,float16,float16,0,9.36005096435547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,4,128,1,float16,float16,0,8.512957000732422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,4,128,1,fp8,fp8,0,4.996558380126953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,4,128,1,float16,fp8,0,5.606460952758789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,8,128,1,float16,fp8,0,5.24157600402832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,8,128,1,float16,float16,0,9.530073547363282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,96,128,1,float16,float16,0,4.767913436889648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,8,128,1,fp8,fp8,0,5.102751922607422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,96,128,1,float16,fp8,0,2.836195182800293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,1,128,1,float16,fp8,0,2.42959041595459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,96,128,1,fp8,fp8,0,2.809984016418457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,1,128,1,float16,float16,0,2.8982255935668944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,1,128,1,fp8,fp8,0,2.511964797973633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,2,128,1,float16,fp8,0,2.486684799194336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,2,128,1,float16,float16,0,3.4118606567382814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,2,128,1,fp8,fp8,0,2.9002880096435546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,4,128,1,float16,float16,0,3.884164810180664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,4,128,1,float16,fp8,0,2.5158687591552735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,4,128,1,fp8,fp8,0,2.408896064758301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,8,128,1,float16,fp8,0,2.4790992736816406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,8,128,1,float16,float16,0,4.381585693359375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,96,128,1,float16,float16,0,1.7001808166503907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,8,128,1,fp8,fp8,0,2.569198417663574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,96,128,1,float16,fp8,0,1.4140975952148438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,96,128,1,fp8,fp8,0,1.729417610168457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,1,128,1,float16,float16,0,1.7792671203613282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,1,128,1,fp8,fp8,0,1.2145392417907714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,2,128,1,float16,float16,0,1.6551664352416993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,2,128,1,fp8,fp8,0,1.212886428833008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,2,128,1,float16,fp8,0,1.8300975799560546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,4,128,1,float16,fp8,0,1.2157135963439942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,4,128,1,fp8,fp8,0,1.2607423782348632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,1,128,1,float16,fp8,0,1.418564796447754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,4,128,1,float16,float16,0,2.1383024215698243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,8,128,1,float16,float16,0,1.3418895721435546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,8,128,1,float16,fp8,0,1.2434224128723144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,96,128,1,float16,float16,0,1.1202624320983887
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,96,128,1,fp8,fp8,0,0.7364927768707276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,1,128,1,float16,float16,0,0.9309359550476074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,1,128,1,float16,fp8,0,0.6442351818084717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,1,128,1,fp8,fp8,0,0.8443360328674316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,2,128,1,float16,float16,0,0.7139311790466308
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,2,128,1,float16,fp8,0,0.6842576026916504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,2,128,1,fp8,fp8,0,0.8440655708312989
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,4,128,1,float16,float16,0,0.7117631912231446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,8,128,1,fp8,fp8,0,1.2227855682373048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,4,128,1,float16,fp8,0,0.8086319923400879
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,96,128,1,float16,fp8,0,0.9306320190429688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,4,128,1,fp8,fp8,0,0.7199967861175537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,8,128,1,float16,float16,0,0.7263728141784668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,8,128,1,float16,fp8,0,0.6403584003448486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,96,128,1,float16,float16,0,0.4382480144500732
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,96,128,1,float16,fp8,0,0.3940704107284546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,8,128,1,fp8,fp8,0,0.6308720111846924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,96,128,1,fp8,fp8,0,0.39466559886932373
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,1,128,1,float16,float16,0,0.3811392068862915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,1,128,1,float16,fp8,0,0.34779040813446044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,2,128,1,float16,float16,0,0.3853247880935669
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,2,128,1,float16,fp8,0,0.3459232091903687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,2,128,1,fp8,fp8,0,0.34857280254364015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,4,128,1,float16,float16,0,0.3855583906173706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,4,128,1,fp8,fp8,0,0.3476272106170654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,8,128,1,float16,float16,0,0.38896000385284424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,8,128,1,float16,fp8,0,0.34791519641876223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,8,128,1,fp8,fp8,0,0.34448800086975095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,1,128,1,fp8,fp8,0,0.3413680076599121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,4,128,1,float16,fp8,0,0.3537919998168945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,1,128,1,float16,fp8,0,5.867284774780273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,1,128,1,float16,float16,0,9.12893295288086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,1,128,1,fp8,fp8,0,6.141408157348633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,2,128,1,float16,fp8,0,5.815209579467774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,2,128,1,float16,float16,0,8.518379211425781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,4,128,1,float16,float16,0,10.116563415527343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,2,128,1,fp8,fp8,0,6.4119102478027346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,4,128,1,float16,fp8,0,5.9024913787841795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,4,128,1,fp8,fp8,0,5.881182479858398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,8,128,1,float16,fp8,0,6.325115203857422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,8,128,1,fp8,fp8,0,6.242510223388672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,96,128,1,float16,float16,0,4.9783935546875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,8,128,1,float16,float16,0,10.965898895263672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,96,128,1,float16,fp8,0,3.62703857421875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,96,128,1,fp8,fp8,0,3.5608448028564452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,1,128,1,float16,float16,0,4.536032104492188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,1,128,1,fp8,fp8,0,2.91965274810791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,1,128,1,float16,fp8,0,3.435201644897461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,2,128,1,float16,float16,0,4.610124969482422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,2,128,1,float16,fp8,0,2.986105537414551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,2,128,1,fp8,fp8,0,3.006564712524414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,4,128,1,float16,float16,0,4.790636825561523
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,4,128,1,fp8,fp8,0,3.0065776824951174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,4,128,1,float16,fp8,0,3.080780792236328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,8,128,1,float16,float16,0,4.598355102539062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,96,128,1,float16,float16,0,2.293913650512695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,8,128,1,float16,fp8,0,3.259579086303711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,8,128,1,fp8,fp8,0,2.9730831146240235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,96,128,1,float16,fp8,0,2.0349056243896486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,1,128,1,float16,float16,0,1.6576223373413086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,96,128,1,fp8,fp8,0,1.8903423309326173
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,1,128,1,float16,fp8,0,1.5945679664611816
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,1,128,1,fp8,fp8,0,1.6888015747070313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,2,128,1,float16,fp8,0,1.4656047821044922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,2,128,1,fp8,fp8,0,1.4992112159729003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,2,128,1,float16,float16,0,2.596014404296875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,4,128,1,float16,fp8,0,1.4506367683410644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,4,128,1,fp8,fp8,0,1.4738880157470704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,4,128,1,float16,float16,0,1.8081872940063477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,8,128,1,float16,fp8,0,1.4488320350646973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,8,128,1,fp8,fp8,0,1.4675328254699707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,8,128,1,float16,float16,0,2.5042192459106447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,96,128,1,float16,fp8,0,0.9319808006286621
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,96,128,1,fp8,fp8,0,0.908516788482666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,1,128,1,float16,float16,0,1.2926176071166993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,1,128,1,float16,fp8,0,0.7630127906799317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,1,128,1,fp8,fp8,0,0.7558271884918213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,2,128,1,float16,fp8,0,0.7482048034667969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,2,128,1,fp8,fp8,0,0.7558864116668701
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,2,128,1,float16,float16,0,1.2438048362731933
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,4,128,1,float16,float16,0,0.8098352432250977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,4,128,1,float16,fp8,0,0.7543200016021728
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,96,128,1,float16,float16,0,1.4656384468078614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,4,128,1,fp8,fp8,0,1.007919979095459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,8,128,1,float16,float16,0,0.9537712097167969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,8,128,1,float16,fp8,0,0.7563615798950195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,96,128,1,float16,float16,0,0.5388591766357422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,8,128,1,fp8,fp8,0,0.7525023937225341
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,96,128,1,fp8,fp8,0,0.47295198440551756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,1,128,1,float16,float16,0,0.4507567882537842
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,96,128,1,float16,fp8,0,0.6492767810821534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,1,128,1,fp8,fp8,0,0.3994960069656372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,2,128,1,float16,float16,0,0.4968416213989258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,2,128,1,fp8,fp8,0,0.3997391939163208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,4,128,1,float16,float16,0,0.5364607810974121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,4,128,1,float16,fp8,0,0.398852801322937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,4,128,1,fp8,fp8,0,0.5426095962524414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,8,128,1,float16,float16,0,0.4406015872955322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,8,128,1,float16,fp8,0,0.41472320556640624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,8,128,1,fp8,fp8,0,0.3982367992401123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,96,128,1,float16,float16,0,0.278054404258728
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,96,128,1,float16,fp8,0,0.2563904047012329
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,96,128,1,fp8,fp8,0,0.25477759838104247
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,1,128,1,float16,float16,0,0.24051198959350586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,1,128,1,float16,fp8,0,0.21555039882659913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,1,128,1,fp8,fp8,0,0.2174191951751709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,1,128,1,float16,fp8,0,0.4002511978149414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,2,128,1,float16,float16,0,0.2377232074737549
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,2,128,1,float16,fp8,0,0.21767840385437012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,2,128,1,fp8,fp8,0,0.218286395072937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,4,128,1,float16,float16,0,0.23839519023895264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,2,128,1,float16,fp8,0,0.4223184108734131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,4,128,1,float16,fp8,0,0.218556809425354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,4,128,1,fp8,fp8,0,0.22088639736175536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,8,128,1,float16,float16,0,0.24046719074249268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,8,128,1,float16,fp8,0,0.2157599925994873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,8,128,1,fp8,fp8,0,0.21869120597839356
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,1,128,1,fp8,fp8,0,5.794393539428711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,1,128,1,float16,fp8,0,6.087166213989258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,2,128,1,float16,float16,0,8.764093017578125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,1,128,1,float16,float16,0,9.606982421875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,2,128,1,float16,fp8,0,5.823519897460938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,2,128,1,fp8,fp8,0,6.013383865356445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,4,128,1,float16,fp8,0,6.1193183898925785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,4,128,1,float16,float16,0,8.952467346191407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,4,128,1,fp8,fp8,0,5.84249267578125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,8,128,1,float16,fp8,0,6.369543838500976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,8,128,1,float16,float16,0,10.349215698242187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,96,128,1,float16,float16,0,5.535881423950196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,8,128,1,fp8,fp8,0,6.353055953979492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,96,128,1,float16,fp8,0,3.9314239501953123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,96,128,1,fp8,fp8,0,3.7299022674560547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,1,128,1,float16,float16,0,4.451358413696289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,1,128,1,float16,fp8,0,3.0308015823364256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,1,128,1,fp8,fp8,0,2.950214385986328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,2,128,1,float16,fp8,0,2.9662687301635744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,2,128,1,float16,float16,0,4.514529418945313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,2,128,1,fp8,fp8,0,3.1539920806884765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,4,128,1,float16,fp8,0,2.980587196350098
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,4,128,1,float16,float16,0,4.259123229980469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,4,128,1,fp8,fp8,0,2.9431312561035154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,8,128,1,float16,float16,0,3.7084800720214846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,8,128,1,float16,fp8,0,2.940166473388672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,96,128,1,float16,float16,0,2.0771295547485353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,8,128,1,fp8,fp8,0,2.936177635192871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,96,128,1,float16,fp8,0,2.3926319122314452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,96,128,1,fp8,fp8,0,1.8784528732299806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,1,128,1,float16,fp8,0,1.4774288177490233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,1,128,1,float16,float16,0,2.0234352111816407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,1,128,1,fp8,fp8,0,1.50457763671875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,2,128,1,float16,fp8,0,1.4669296264648437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,2,128,1,float16,float16,0,1.7367136001586914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,2,128,1,fp8,fp8,0,1.6613040924072267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,4,128,1,float16,float16,0,1.5933872222900392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,4,128,1,float16,fp8,0,1.4648688316345215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,4,128,1,fp8,fp8,0,1.7471952438354492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,8,128,1,float16,float16,0,1.5953503608703614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,8,128,1,fp8,fp8,0,1.4635680198669434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,8,128,1,float16,fp8,0,1.7406576156616211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,96,128,1,float16,float16,0,1.0430944442749024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,96,128,1,float16,fp8,0,1.3659695625305175
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,1,128,1,float16,float16,0,0.8238351821899415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,96,128,1,fp8,fp8,0,0.949396800994873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,1,128,1,float16,fp8,0,0.9827679634094239
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,1,128,1,fp8,fp8,0,0.7962399959564209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,2,128,1,float16,fp8,0,0.7488639831542969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,2,128,1,fp8,fp8,0,0.750600004196167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,4,128,1,float16,fp8,0,0.7487455844879151
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,4,128,1,float16,float16,0,0.8099984169006348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,4,128,1,fp8,fp8,0,0.750764799118042
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,8,128,1,float16,float16,0,0.8054896354675293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,8,128,1,float16,fp8,0,0.8197680473327636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,8,128,1,fp8,fp8,0,0.7500991821289062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,96,128,1,float16,float16,0,0.5570271968841553
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,96,128,1,float16,fp8,0,0.49592318534851076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,96,128,1,fp8,fp8,0,0.4919568061828613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,1,128,1,float16,fp8,0,0.39495038986206055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,1,128,1,float16,float16,0,0.4806431770324707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,1,128,1,fp8,fp8,0,0.3920144081115723
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,2,128,1,float16,float16,0,0.4244847774505615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,2,128,1,float16,fp8,0,0.4693103790283203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,2,128,1,float16,float16,0,0.815447998046875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,2,128,1,fp8,fp8,0,0.39087519645690916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,4,128,1,float16,float16,0,0.4271071910858154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,4,128,1,float16,fp8,0,0.420684814453125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,4,128,1,fp8,fp8,0,0.39783520698547364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,8,128,1,float16,float16,0,0.4275199890136719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,8,128,1,float16,fp8,0,0.3968928098678589
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,8,128,1,fp8,fp8,0,0.3896575927734375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,96,128,1,float16,fp8,0,0.2609472036361694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,96,128,1,fp8,fp8,0,0.2625472068786621
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,1,128,1,float16,fp8,0,0.21084959506988527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,1,128,1,fp8,fp8,0,0.21207039356231688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,2,128,1,float16,float16,0,0.22578558921813965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,2,128,1,float16,fp8,0,0.2124016046524048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,2,128,1,fp8,fp8,0,0.21041440963745117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,4,128,1,float16,float16,0,0.2303312063217163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,4,128,1,float16,fp8,0,0.21040959358215333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,4,128,1,fp8,fp8,0,0.21313600540161132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,8,128,1,float16,float16,0,0.22899839878082276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,8,128,1,float16,fp8,0,0.21227679252624512
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,8,128,1,fp8,fp8,0,0.21074240207672118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,96,128,1,float16,float16,0,0.16075359582901
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,96,128,1,float16,fp8,0,0.14503040313720703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,96,128,1,fp8,fp8,0,0.14572800397872926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,1,128,1,float16,float16,0,0.12497919797897339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,1,128,1,float16,fp8,0,0.11744639873504639
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,1,128,1,fp8,fp8,0,0.11708159446716308
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,2,128,1,float16,float16,0,0.12703360319137574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,2,128,1,float16,fp8,0,0.11673280000686645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,2,128,1,fp8,fp8,0,0.11765120029449463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,4,128,1,float16,float16,0,0.12607359886169434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,4,128,1,float16,fp8,0,0.1175279974937439
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,96,128,1,float16,float16,0,0.29116799831390383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,4,128,1,fp8,fp8,0,0.11632000207901001
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,8,128,1,float16,float16,0,0.13001760244369506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,8,128,1,float16,fp8,0,0.11952639818191528
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,8,128,1,fp8,fp8,0,0.11686240434646607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,1,128,1,float16,float16,0,0.22530078887939453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,1,128,1,float16,fp8,0,3.680400085449219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,1,128,1,fp8,fp8,0,3.6805248260498047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,1,128,1,float16,float16,0,4.900680160522461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,2,128,1,float16,float16,0,4.997859191894531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,2,128,1,fp8,fp8,0,3.6831985473632813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,2,128,1,float16,fp8,0,3.8662174224853514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,4,128,1,float16,fp8,0,3.647166442871094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,4,128,1,float16,float16,0,5.239206314086914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,4,128,1,fp8,fp8,0,3.7199119567871093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,8,128,1,float16,float16,0,4.523231887817383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,8,128,1,float16,fp8,0,3.648980712890625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,8,128,1,fp8,fp8,0,3.682324981689453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,96,128,1,float16,fp8,0,2.442571258544922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,96,128,1,float16,float16,0,3.1083663940429687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,96,128,1,fp8,fp8,0,2.4488784790039064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,1,128,1,float16,float16,0,2.0519567489624024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,1,128,1,float16,fp8,0,2.008380889892578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,1,128,1,fp8,fp8,0,1.8632816314697265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,2,128,1,float16,float16,0,2.185873603820801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,2,128,1,float16,fp8,0,2.0486000061035154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,2,128,1,fp8,fp8,0,1.8525535583496093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,4,128,1,float16,float16,0,2.0467519760131836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,4,128,1,fp8,fp8,0,1.8382911682128906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,4,128,1,float16,fp8,0,2.0664640426635743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,8,128,1,float16,float16,0,1.9926544189453126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,8,128,1,float16,fp8,0,2.357004737854004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,96,128,1,float16,float16,0,1.3459296226501465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,8,128,1,fp8,fp8,0,1.83703670501709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,96,128,1,float16,fp8,0,1.6298559188842774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,1,128,1,float16,fp8,0,0.9366016387939453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,1,128,1,float16,float16,0,1.1321599960327149
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,96,128,1,fp8,fp8,0,1.242679977416992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,1,128,1,fp8,fp8,0,1.0035152435302734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,2,128,1,float16,float16,0,0.9650208473205566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,2,128,1,fp8,fp8,0,0.9464287757873535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,2,128,1,float16,fp8,0,1.1004624366760254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,4,128,1,float16,float16,0,1.000984001159668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,4,128,1,float16,fp8,0,1.0549471855163575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,4,128,1,fp8,fp8,0,0.9431952476501465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,8,128,1,float16,float16,0,0.9987135887145996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,8,128,1,float16,fp8,0,1.2232224464416503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,96,128,1,float16,float16,0,0.694159984588623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,8,128,1,fp8,fp8,0,0.9317119598388672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,96,128,1,float16,fp8,0,0.6345680236816407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,1,128,1,float16,float16,0,0.5113711833953858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,96,128,1,fp8,fp8,0,0.6921328067779541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,1,128,1,fp8,fp8,0,0.6977359771728515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,2,128,1,float16,float16,0,0.5125472068786621
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,2,128,1,float16,fp8,0,0.4998608112335205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,2,128,1,fp8,fp8,0,0.48702077865600585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,4,128,1,float16,float16,0,0.5139167785644532
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,4,128,1,float16,fp8,0,0.4858799934387207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,4,128,1,fp8,fp8,0,0.48020000457763673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,8,128,1,float16,float16,0,0.5377647876739502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,8,128,1,float16,fp8,0,0.483841609954834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,8,128,1,fp8,fp8,0,0.47980642318725586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,96,128,1,float16,float16,0,0.36723999977111815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,96,128,1,float16,fp8,0,0.33255040645599365
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,96,128,1,fp8,fp8,0,0.3297760009765625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,1,128,1,float16,float16,0,0.27744479179382325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,1,128,1,float16,fp8,0,0.25867040157318116
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,1,128,1,fp8,fp8,0,0.25346078872680666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,2,128,1,float16,float16,0,0.2713903903961182
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,2,128,1,float16,fp8,0,0.25863521099090575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,2,128,1,fp8,fp8,0,0.25373599529266355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,4,128,1,float16,float16,0,0.27317759990692136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,4,128,1,float16,fp8,0,0.2579087972640991
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,8,128,1,float16,float16,0,0.27774720191955565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,8,128,1,fp8,fp8,0,0.25505919456481935
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,96,128,1,float16,float16,0,0.19357600212097167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,96,128,1,float16,fp8,0,0.1787536025047302
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,96,128,1,fp8,fp8,0,0.17891680002212523
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,1,128,1,float16,float16,0,0.14899519681930543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,1,128,1,float16,fp8,0,0.13915200233459474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,1,128,1,fp8,fp8,0,0.13929599523544312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,2,128,1,float16,float16,0,0.14881759881973267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,2,128,1,float16,fp8,0,0.13962719440460206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,2,128,1,fp8,fp8,0,0.13923039436340331
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,4,128,1,float16,float16,0,0.14944640398025513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,4,128,1,fp8,fp8,0,0.25414559841156004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,4,128,1,float16,fp8,0,0.14001599550247193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,4,128,1,fp8,fp8,0,0.13825279474258423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,8,128,1,float16,fp8,0,0.25800960063934325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,8,128,1,float16,float16,0,0.1513375997543335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,8,128,1,float16,fp8,0,0.13809280395507811
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,8,128,1,fp8,fp8,0,0.14084160327911377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,96,128,1,float16,float16,0,0.10730719566345215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,96,128,1,fp8,fp8,0,0.1018496036529541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,1,128,1,float16,float16,0,0.08392959833145142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,1,128,1,float16,fp8,0,0.08022400140762329
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,1,128,1,fp8,fp8,0,0.07989280223846436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,2,128,1,float16,float16,0,0.08480160236358643
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,2,128,1,float16,fp8,0,0.07987200021743775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,2,128,1,fp8,fp8,0,0.08013120293617249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,4,128,1,float16,float16,0,0.08455039858818054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,4,128,1,float16,fp8,0,0.08098559975624084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,4,128,1,fp8,fp8,0,0.07938399910926819
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,8,128,1,float16,fp8,0,0.0810800015926361
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,8,128,1,fp8,fp8,0,0.0800704002380371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,1,128,1,float16,fp8,0,0.48095998764038084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,96,128,1,float16,fp8,0,0.09987199902534485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,8,128,1,float16,float16,0,0.08768320083618164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,1,128,1,float16,float16,0,4.966985702514648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,1,128,1,float16,fp8,0,3.953841781616211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,1,128,1,fp8,fp8,0,3.9555438995361327
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,2,128,1,float16,float16,0,5.008670425415039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,2,128,1,float16,fp8,0,3.988991928100586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,2,128,1,fp8,fp8,0,3.949740982055664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,4,128,1,float16,float16,0,5.616524887084961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,4,128,1,float16,fp8,0,3.976968002319336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,4,128,1,fp8,fp8,0,3.9885936737060548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,8,128,1,float16,float16,0,5.257822418212891
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,8,128,1,float16,fp8,0,3.9721630096435545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,8,128,1,fp8,fp8,0,3.9839649200439453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,96,128,1,float16,float16,0,3.425032043457031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,1,128,1,float16,float16,0,2.0532863616943358
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,96,128,1,float16,fp8,0,2.79738712310791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,96,128,1,fp8,fp8,0,2.7805999755859374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,1,128,1,float16,fp8,0,2.0790719985961914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,1,128,1,fp8,fp8,0,2.147478485107422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,2,128,1,float16,float16,0,2.0386320114135743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,2,128,1,float16,fp8,0,1.988920021057129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,2,128,1,fp8,fp8,0,1.9891328811645508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,4,128,1,float16,float16,0,2.202102470397949
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,4,128,1,fp8,fp8,0,1.9877792358398438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,4,128,1,float16,fp8,0,2.603144073486328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,8,128,1,float16,float16,0,2.2437023162841796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,96,128,1,float16,float16,0,1.5461503982543945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,8,128,1,fp8,fp8,0,1.9837152481079101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,96,128,1,float16,fp8,0,1.6053888320922851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,1,128,1,float16,float16,0,1.0221455574035645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,1,128,1,float16,fp8,0,1.0090895652770997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,96,128,1,fp8,fp8,0,1.6175359725952148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,1,128,1,fp8,fp8,0,1.1002767562866211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,2,128,1,float16,float16,0,1.0149552345275878
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,2,128,1,float16,fp8,0,1.011905574798584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,2,128,1,fp8,fp8,0,1.0096223831176758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,4,128,1,float16,float16,0,1.0232864379882813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,4,128,1,float16,fp8,0,1.0094688415527344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,4,128,1,fp8,fp8,0,1.0078672409057616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,8,128,1,float16,float16,0,1.248076820373535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,8,128,1,float16,fp8,0,1.007089614868164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,8,128,1,fp8,fp8,0,1.0047455787658692
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,96,128,1,float16,fp8,0,0.7172848224639893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,96,128,1,float16,float16,0,0.9759087562561035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,1,128,1,float16,float16,0,0.5372528076171875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,96,128,1,fp8,fp8,0,0.7147952079772949
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,1,128,1,float16,fp8,0,0.5398640155792236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,8,128,1,float16,fp8,0,2.238609504699707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,1,128,1,fp8,fp8,0,0.5198944091796875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,2,128,1,float16,fp8,0,0.515172815322876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,2,128,1,fp8,fp8,0,0.5215104103088379
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,4,128,1,float16,float16,0,0.5419680118560791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,4,128,1,fp8,fp8,0,0.5203104019165039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,8,128,1,float16,float16,0,0.5526159763336181
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,8,128,1,float16,fp8,0,0.5192863941192627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,8,128,1,fp8,fp8,0,0.5166255950927734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,96,128,1,float16,float16,0,0.40154080390930175
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,96,128,1,float16,fp8,0,0.37181758880615234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,96,128,1,fp8,fp8,0,0.3698080062866211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,1,128,1,float16,float16,0,0.28299999237060547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,1,128,1,float16,fp8,0,0.27123360633850097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,1,128,1,fp8,fp8,0,0.27226080894470217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,2,128,1,float16,float16,0,0.5272928237915039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,2,128,1,float16,fp8,0,0.2713279962539673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,2,128,1,fp8,fp8,0,0.26756958961486815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,4,128,1,float16,float16,0,0.2844543933868408
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,4,128,1,float16,fp8,0,0.2673680067062378
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,4,128,1,fp8,fp8,0,0.2713248014450073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,8,128,1,float16,float16,0,0.2841504096984863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,8,128,1,float16,fp8,0,0.26948161125183107
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,8,128,1,fp8,fp8,0,0.26660799980163574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,96,128,1,float16,float16,0,0.2141808032989502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,96,128,1,fp8,fp8,0,0.19713120460510253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,1,128,1,float16,float16,0,0.152510404586792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,1,128,1,float16,fp8,0,0.1462448000907898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,1,128,1,fp8,fp8,0,0.1463711977005005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,2,128,1,float16,float16,0,0.15336159467697144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,2,128,1,float16,float16,0,0.2805392026901245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,2,128,1,float16,fp8,0,0.1466063976287842
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,2,128,1,fp8,fp8,0,0.14664160013198851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,4,128,1,float16,float16,0,0.15437439680099488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,4,128,1,float16,fp8,0,0.1461967945098877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,4,128,1,fp8,fp8,0,0.1463312029838562
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,8,128,1,float16,float16,0,0.155622398853302
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,8,128,1,float16,fp8,0,0.1462272047996521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,8,128,1,fp8,fp8,0,0.1452623963356018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,96,128,1,float16,float16,0,0.1192896008491516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,96,128,1,float16,fp8,0,0.1090224027633667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,96,128,1,fp8,fp8,0,0.10820000171661377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,1,128,1,float16,float16,0,0.08443679809570312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,1,128,1,float16,fp8,0,0.08063200116157532
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,1,128,1,fp8,fp8,0,0.08065279722213745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,96,128,1,float16,fp8,0,0.19520000219345093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,2,128,1,float16,float16,0,0.08481600284576415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,2,128,1,float16,fp8,0,0.08036640286445618
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,2,128,1,fp8,fp8,0,0.07990080118179321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,4,128,1,float16,float16,0,0.0856607973575592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,4,128,1,float16,fp8,0,0.08055359721183777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,4,128,1,fp8,fp8,0,0.08016639947891235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,8,128,1,float16,float16,0,0.08785439729690551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,8,128,1,float16,fp8,0,0.08072320222854615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,8,128,1,fp8,fp8,0,0.07988319993019104
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,96,128,1,float16,float16,0,0.06850240230560303
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,96,128,1,float16,fp8,0,0.06194239854812622
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,96,128,1,fp8,fp8,0,0.061526399850845334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,1,128,1,float16,float16,0,0.05163360238075256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,1,128,1,float16,fp8,0,0.050065600872039796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,1,128,1,fp8,fp8,0,0.049353599548339844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,2,128,1,float16,float16,0,0.05151039958000183
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,2,128,1,float16,fp8,0,0.050128000974655154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,2,128,1,fp8,fp8,0,0.04999519884586334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,4,128,1,float16,float16,0,0.051851201057434085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,4,128,1,float16,fp8,0,0.05022720098495483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,4,128,1,fp8,fp8,0,0.04992479979991913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,8,128,1,float16,float16,0,0.052425599098205565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,8,128,1,float16,fp8,0,0.04998719990253449
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,8,128,1,fp8,fp8,0,0.050032001733779904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,4,128,1,float16,fp8,0,0.5130047798156738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,1,128,1,float16,float16,0,3.2818977355957033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,1,128,1,float16,fp8,0,3.0604719161987304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,1,128,1,fp8,fp8,0,3.044798469543457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,2,128,1,float16,fp8,0,3.0522415161132814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,2,128,1,fp8,fp8,0,3.0382144927978514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,4,128,1,float16,float16,0,3.5462879180908202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,4,128,1,float16,fp8,0,3.0486896514892576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,2,128,1,float16,float16,0,3.1428287506103514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,4,128,1,fp8,fp8,0,3.036014366149902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,8,128,1,float16,float16,0,3.667878341674805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,96,128,1,float16,float16,0,2.572928047180176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,8,128,1,fp8,fp8,0,3.0590255737304686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,1,128,1,float16,float16,0,1.5121824264526367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,96,128,1,float16,fp8,0,2.3532575607299804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,96,128,1,fp8,fp8,0,2.3346431732177733
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,8,128,1,float16,fp8,0,3.043212890625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,1,128,1,float16,fp8,0,1.5645952224731445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,1,128,1,fp8,fp8,0,1.535591983795166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,2,128,1,float16,float16,0,1.525857639312744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,2,128,1,float16,fp8,0,1.5370143890380858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,2,128,1,fp8,fp8,0,1.5444016456604004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,4,128,1,float16,float16,0,1.5202768325805665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,4,128,1,float16,fp8,0,1.6660543441772462
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,4,128,1,fp8,fp8,0,1.5354864120483398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,8,128,1,float16,float16,0,1.5998784065246583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,8,128,1,float16,fp8,0,1.52400484085083
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,8,128,1,fp8,fp8,0,1.5243616104125977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,96,128,1,float16,float16,0,1.2564224243164062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,96,128,1,fp8,fp8,0,1.1644656181335449
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,96,128,1,float16,fp8,0,1.2454480171203612
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,1,128,1,float16,float16,0,0.7593616008758545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,1,128,1,float16,fp8,0,0.9183055877685546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,1,128,1,fp8,fp8,0,0.7775856018066406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,2,128,1,float16,fp8,0,0.7741119861602783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,2,128,1,fp8,fp8,0,0.7787024021148682
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,4,128,1,float16,float16,0,0.7701119899749755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,4,128,1,float16,fp8,0,0.7733967781066895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,4,128,1,fp8,fp8,0,0.7772111892700195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,8,128,1,float16,float16,0,0.7875247955322265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,8,128,1,float16,fp8,0,0.7717616081237793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,96,128,1,float16,fp8,0,0.592307186126709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,8,128,1,fp8,fp8,0,0.7745327949523926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,1,128,1,float16,float16,0,0.40039520263671874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,96,128,1,fp8,fp8,0,0.592091178894043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,2,128,1,float16,float16,0,0.7688000202178955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,1,128,1,float16,fp8,0,0.3999311923980713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,1,128,1,fp8,fp8,0,0.3969167947769165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,2,128,1,float16,float16,0,0.39825439453125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,2,128,1,float16,fp8,0,0.3988480091094971
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,2,128,1,fp8,fp8,0,0.3959984064102173
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,4,128,1,float16,float16,0,0.4024223804473877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,4,128,1,float16,fp8,0,0.3987839937210083
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,4,128,1,fp8,fp8,0,0.39555680751800537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,8,128,1,float16,float16,0,0.41297597885131837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,8,128,1,float16,fp8,0,0.3971280097961426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,8,128,1,fp8,fp8,0,0.3942768096923828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,96,128,1,float16,float16,0,0.330350399017334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,96,128,1,float16,fp8,0,0.30796959400177004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,1,128,1,float16,float16,0,0.20968000888824462
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,96,128,1,float16,float16,0,0.7077407836914062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,1,128,1,float16,fp8,0,0.2096463918685913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,1,128,1,fp8,fp8,0,0.20938720703125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,2,128,1,float16,float16,0,0.2073744058609009
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,2,128,1,fp8,fp8,0,0.2072767972946167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,4,128,1,float16,float16,0,0.21048319339752197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,4,128,1,float16,fp8,0,0.2082223892211914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,4,128,1,fp8,fp8,0,0.20629279613494872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,8,128,1,float16,float16,0,0.21678240299224855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,8,128,1,float16,fp8,0,0.20573599338531495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,8,128,1,fp8,fp8,0,0.20748000144958495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,96,128,1,float16,float16,0,0.17378560304641724
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,96,128,1,float16,fp8,0,0.16254400014877318
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,96,128,1,fp8,fp8,0,0.1605232000350952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,1,128,1,float16,float16,0,0.11349760293960572
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,1,128,1,float16,fp8,0,0.1110368013381958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,1,128,1,fp8,fp8,0,0.11232960224151611
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,2,128,1,float16,float16,0,0.1124127984046936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,2,128,1,float16,fp8,0,0.11267039775848389
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,2,128,1,fp8,fp8,0,0.11081440448760986
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,4,128,1,float16,float16,0,0.11518080234527588
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,4,128,1,float16,fp8,0,0.11095999479293824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,4,128,1,fp8,fp8,0,0.11288000345230102
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,8,128,1,float16,float16,0,0.11593600511550903
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,8,128,1,float16,fp8,0,0.1125599980354309
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,8,128,1,fp8,fp8,0,0.1110111951828003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,96,128,1,float16,float16,0,0.09669600129127502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,96,128,1,float16,fp8,0,0.08835999965667725
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,96,128,1,fp8,fp8,0,0.08839679956436157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,1,128,1,float16,float16,0,0.06219999790191651
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,1,128,1,fp8,fp8,0,0.06172159910202026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,96,128,1,fp8,fp8,0,0.30530719757080077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,2,128,1,float16,float16,0,0.0618511974811554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,2,128,1,float16,fp8,0,0.06164479851722717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,2,128,1,fp8,fp8,0,0.06161919832229614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,4,128,1,float16,float16,0,0.06293759942054748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,4,128,1,float16,fp8,0,0.06121600270271301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,4,128,1,fp8,fp8,0,0.06167200207710266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,8,128,1,float16,float16,0,0.06430240273475647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,8,128,1,float16,fp8,0,0.06142560243606567
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,8,128,1,fp8,fp8,0,0.061712002754211424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,96,128,1,float16,float16,0,0.055508798360824584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,96,128,1,float16,fp8,0,0.05005120038986206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,96,128,1,fp8,fp8,0,0.0500927984714508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,1,128,1,float16,float16,0,0.038464000821113585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,1,128,1,float16,fp8,0,0.03724640011787415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,1,128,1,fp8,fp8,0,0.03711360096931458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,2,128,1,float16,float16,0,0.038571199774742125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,2,128,1,float16,fp8,0,0.037190398573875426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,4,128,1,float16,float16,0,0.039110401272773744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,2,128,1,fp8,fp8,0,0.037441599369049075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,4,128,1,float16,fp8,0,0.037257599830627444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,4,128,1,fp8,fp8,0,0.038020798563957216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,8,128,1,float16,float16,0,0.03920960128307342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,2,128,1,float16,fp8,0,0.20954079627990724
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,8,128,1,fp8,fp8,0,0.03794240057468414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,96,128,1,float16,fp8,0,0.03287360072135925
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,96,128,1,fp8,fp8,0,0.03294079899787903
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,1,128,1,float16,float16,0,0.026862400770187377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,1,128,1,float16,fp8,0,0.026767998933792114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,1,128,1,fp8,fp8,0,0.02675839960575104
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,2,128,1,float16,float16,0,0.026830399036407472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,2,128,1,float16,fp8,0,0.026796799898147584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,2,128,1,fp8,fp8,0,0.026862400770187377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,4,128,1,float16,float16,0,0.02683520019054413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,4,128,1,float16,fp8,0,0.02683840095996857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,4,128,1,fp8,fp8,0,0.026820799708366393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,8,128,1,float16,float16,0,0.02690559923648834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,8,128,1,float16,fp8,0,0.0267984002828598
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,8,128,1,fp8,fp8,0,0.026771199703216553
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,1,128,1,float16,fp8,0,0.06174719929695129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,8,128,1,float16,fp8,0,0.0373663991689682
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,1,128,1,float16,float16,0,1.225553607940674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,96,128,1,float16,float16,0,0.03299359977245331
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,2,128,1,float16,float16,0,1.2233440399169921
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,1,128,1,fp8,fp8,0,1.287558364868164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,2,128,1,float16,fp8,0,1.2894880294799804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,2,128,1,fp8,fp8,0,1.2952192306518555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,4,128,1,float16,fp8,0,1.286524772644043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,4,128,1,fp8,fp8,0,1.2829327583312988
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,8,128,1,float16,float16,0,1.2818096160888672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,1,128,1,float16,fp8,0,1.2992752075195313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,8,128,1,float16,fp8,0,1.2814288139343262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,8,128,1,fp8,fp8,0,1.4508480072021483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,96,128,1,float16,float16,0,1.1012672424316405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,4,128,1,float16,float16,0,1.2403856277465821
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,96,128,1,float16,fp8,0,1.0377936363220215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,1,128,1,float16,float16,0,0.6274943828582764
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,1,128,1,float16,fp8,0,0.6509391784667968
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,96,128,1,fp8,fp8,0,1.0410880088806151
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,1,128,1,fp8,fp8,0,0.6505248069763183
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,2,128,1,float16,float16,0,0.6272975921630859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,2,128,1,float16,fp8,0,0.6497551918029785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,2,128,1,fp8,fp8,0,0.6507520198822021
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,4,128,1,float16,float16,0,0.628984022140503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,4,128,1,float16,fp8,0,0.654420804977417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,4,128,1,fp8,fp8,0,0.6490992069244385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,8,128,1,float16,float16,0,0.6521312236785889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,8,128,1,float16,fp8,0,0.6483583927154541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,96,128,1,float16,float16,0,0.5582640171051025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,8,128,1,fp8,fp8,0,0.6542255878448486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,96,128,1,float16,fp8,0,0.5291344165802002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,1,128,1,float16,float16,0,0.31969280242919923
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,1,128,1,float16,fp8,0,0.3366031885147095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,1,128,1,fp8,fp8,0,0.33506720066070556
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,2,128,1,float16,float16,0,0.3188512086868286
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,2,128,1,float16,fp8,0,0.33529438972473147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,2,128,1,fp8,fp8,0,0.3346944093704224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,4,128,1,float16,float16,0,0.3228080034255981
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,4,128,1,float16,fp8,0,0.33578240871429443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,4,128,1,fp8,fp8,0,0.33396799564361573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,8,128,1,float16,float16,0,0.3327552080154419
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,8,128,1,float16,fp8,0,0.3349616050720215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,8,128,1,fp8,fp8,0,0.333459210395813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,96,128,1,float16,float16,0,0.28886559009552004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,96,128,1,float16,fp8,0,0.2736448049545288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,1,128,1,float16,float16,0,0.1700096011161804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,96,128,1,fp8,fp8,0,0.2751568078994751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,1,128,1,fp8,fp8,0,0.17382559776306153
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,2,128,1,float16,float16,0,0.173307204246521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,2,128,1,float16,fp8,0,0.174180805683136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,2,128,1,fp8,fp8,0,0.17648639678955078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,4,128,1,float16,float16,0,0.1717072010040283
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,4,128,1,float16,fp8,0,0.17632319927215576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,4,128,1,fp8,fp8,0,0.17319200038909913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,8,128,1,float16,float16,0,0.18033920526504515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,8,128,1,float16,fp8,0,0.17251039743423463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,96,128,1,fp8,fp8,0,0.526859188079834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,8,128,1,fp8,fp8,0,0.17624800205230712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,96,128,1,float16,float16,0,0.15483360290527343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,96,128,1,float16,fp8,0,0.14482239484786988
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,96,128,1,fp8,fp8,0,0.14603999853134156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,1,128,1,float16,float16,0,0.0932703971862793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,1,128,1,float16,fp8,0,0.09666240215301514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,1,128,1,fp8,fp8,0,0.09478719830513001
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,2,128,1,float16,float16,0,0.09575520157814026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,2,128,1,float16,fp8,0,0.09452639818191529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,2,128,1,fp8,fp8,0,0.0966816008090973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,4,128,1,float16,float16,0,0.09488000273704529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,4,128,1,float16,fp8,0,0.09656800031661987
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,4,128,1,fp8,fp8,0,0.09499359726905823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,8,128,1,float16,float16,0,0.09903519749641418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,8,128,1,fp8,fp8,0,0.09688320159912109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,96,128,1,float16,float16,0,0.08889120221138
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,96,128,1,float16,fp8,0,0.08216800093650818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,96,128,1,fp8,fp8,0,0.08220959901809692
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,1,128,1,float16,float16,0,0.05438240170478821
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,1,128,1,float16,fp8,0,0.055561602115631104
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,1,128,1,fp8,fp8,0,0.05562559962272644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,2,128,1,float16,float16,0,0.055638402700424194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,2,128,1,float16,fp8,0,0.05562400221824646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,2,128,1,fp8,fp8,0,0.05543680191040039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,4,128,1,float16,float16,0,0.0557807981967926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,4,128,1,float16,fp8,0,0.05560960173606873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,4,128,1,fp8,fp8,0,0.055606400966644286
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,8,128,1,float16,float16,0,0.057766401767730714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,8,128,1,float16,fp8,0,0.05562719702720642
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,8,128,1,float16,fp8,0,0.0946175992488861
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,8,128,1,fp8,fp8,0,0.05565919876098633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,96,128,1,float16,float16,0,0.04731839895248413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,96,128,1,float16,fp8,0,0.04534879922866821
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,96,128,1,fp8,fp8,0,0.0435696005821228
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,1,128,1,float16,float16,0,0.031092798709869383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,1,128,1,float16,fp8,0,0.03096640110015869
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,1,128,1,fp8,fp8,0,0.03102880120277405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,2,128,1,float16,float16,0,0.030961599946022034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,2,128,1,float16,fp8,0,0.0321615993976593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,4,128,1,float16,float16,0,0.03102400004863739
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,4,128,1,float16,fp8,0,0.03127039968967438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,4,128,1,fp8,fp8,0,0.03105120062828064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,8,128,1,float16,float16,0,0.03275200128555298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,8,128,1,float16,fp8,0,0.031147199869155883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,8,128,1,fp8,fp8,0,0.031385600566864014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,96,128,1,float16,float16,0,0.0272271990776062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,96,128,1,fp8,fp8,0,0.028958401083946227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,96,128,1,float16,fp8,0,0.029175999760627746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,1,128,1,float16,float16,0,0.022881600260734557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,1,128,1,float16,fp8,0,0.023044799268245698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,1,128,1,fp8,fp8,0,0.02468799948692322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,2,128,1,float16,float16,0,0.023057599365711213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,2,128,1,fp8,fp8,0,0.023185600340366364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,4,128,1,float16,float16,0,0.02280000001192093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,4,128,1,float16,fp8,0,0.022862400114536285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,4,128,1,fp8,fp8,0,0.022737599909305573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,8,128,1,float16,float16,0,0.0229312002658844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,8,128,1,float16,fp8,0,0.022841599583625794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,8,128,1,fp8,fp8,0,0.02281759977340698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,96,128,1,float16,float16,0,0.018641600012779237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,1,128,1,float16,fp8,0,0.1770959973335266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,96,128,1,float16,fp8,0,0.01865759938955307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,1,128,1,float16,float16,0,0.01663679927587509
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,2,128,1,fp8,fp8,0,0.030905601382255555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,1,128,1,float16,fp8,0,0.016681599617004394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,1,128,1,fp8,fp8,0,0.01661760061979294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,2,128,1,float16,float16,0,0.01658399999141693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,2,128,1,float16,fp8,0,0.016638399660587312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,2,128,1,fp8,fp8,0,0.01661919951438904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,4,128,1,float16,float16,0,0.016616000235080718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,4,128,1,float16,fp8,0,0.016624000668525696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,4,128,1,fp8,fp8,0,0.016606399416923524
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,8,128,1,float16,float16,0,0.016596800088882445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,8,128,1,float16,fp8,0,0.016625599563121797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,8,128,1,fp8,fp8,0,0.016545599699020384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,1,128,1,float16,float16,0,0.7475615978240967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,2,128,1,float16,fp8,0,0.024676799774169922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,1,128,1,fp8,fp8,0,0.7778255939483643
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,2,128,1,float16,float16,0,0.7506063938140869
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,96,128,1,fp8,fp8,0,0.0188400000333786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,2,128,1,float16,fp8,0,0.7763967990875245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,2,128,1,fp8,fp8,0,0.779643201828003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,4,128,1,float16,fp8,0,0.775051212310791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,4,128,1,fp8,fp8,0,0.7786592006683349
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,1,128,1,float16,fp8,0,0.7779295921325684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,8,128,1,float16,float16,0,0.7726319789886474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,8,128,1,float16,fp8,0,0.7771823883056641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,96,128,1,float16,float16,0,0.6104911804199219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,8,128,1,fp8,fp8,0,0.774294376373291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,1,128,1,float16,float16,0,0.38079519271850587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,96,128,1,fp8,fp8,0,0.586520004272461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,1,128,1,float16,fp8,0,0.394648003578186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,1,128,1,fp8,fp8,0,0.3950191974639893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,2,128,1,float16,float16,0,0.3803231954574585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,2,128,1,float16,fp8,0,0.3941967964172363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,4,128,1,float16,float16,0,0.7630191802978515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,2,128,1,fp8,fp8,0,0.39467999935150144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,4,128,1,float16,float16,0,0.384883189201355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,4,128,1,float16,fp8,0,0.3964240074157715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,4,128,1,fp8,fp8,0,0.394051194190979
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,8,128,1,float16,float16,0,0.3935103893280029
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,8,128,1,float16,fp8,0,0.39601759910583495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,8,128,1,fp8,fp8,0,0.3939120054244995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,96,128,1,float16,float16,0,0.3130928039550781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,96,128,1,float16,fp8,0,0.30190720558166506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,96,128,1,float16,fp8,0,0.5885551929473877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,96,128,1,fp8,fp8,0,0.29915680885314944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,1,128,1,float16,float16,0,0.20039360523223876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,1,128,1,float16,fp8,0,0.20522079467773438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,1,128,1,fp8,fp8,0,0.20292000770568847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,2,128,1,float16,float16,0,0.1996224045753479
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,2,128,1,float16,fp8,0,0.20504319667816162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,2,128,1,fp8,fp8,0,0.20295679569244385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,4,128,1,float16,float16,0,0.2023632049560547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,4,128,1,float16,fp8,0,0.2048640012741089
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,4,128,1,fp8,fp8,0,0.20240800380706786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,8,128,1,float16,float16,0,0.2074496030807495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,8,128,1,float16,fp8,0,0.20513439178466797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,8,128,1,fp8,fp8,0,0.20271360874176025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,96,128,1,float16,float16,0,0.16637760400772095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,96,128,1,float16,fp8,0,0.15871520042419435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,96,128,1,fp8,fp8,0,0.15588480234146118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,1,128,1,float16,fp8,0,0.10890400409698486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,2,128,1,float16,float16,0,0.10787839889526367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,2,128,1,float16,fp8,0,0.10907679796218872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,2,128,1,fp8,fp8,0,0.10900000333786011
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,4,128,1,float16,float16,0,0.10896159410476684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,4,128,1,float16,fp8,0,0.1090224027633667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,4,128,1,fp8,fp8,0,0.10897599458694458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,8,128,1,float16,float16,0,0.11075359582901001
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,8,128,1,float16,fp8,0,0.10898560285568237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,8,128,1,fp8,fp8,0,0.10885440111160279
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,96,128,1,float16,float16,0,0.0906831979751587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,96,128,1,float16,fp8,0,0.0861024022102356
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,1,128,1,float16,float16,0,0.10779839754104614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,96,128,1,fp8,fp8,0,0.0851152002811432
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,1,128,1,fp8,fp8,0,0.05963360071182251
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,2,128,1,float16,float16,0,0.05958880186080932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,2,128,1,float16,fp8,0,0.05955039858818054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,2,128,1,fp8,fp8,0,0.059520000219345094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,4,128,1,float16,float16,0,0.05980160236358643
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,4,128,1,float16,fp8,0,0.059592002630233766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,4,128,1,fp8,fp8,0,0.059575998783111574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,8,128,1,float16,float16,0,0.06161440014839172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,8,128,1,float16,fp8,0,0.059648001194000246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,8,128,1,fp8,fp8,0,0.05968480110168457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,96,128,1,float16,float16,0,0.05151839852333069
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,96,128,1,float16,fp8,0,0.04762240052223206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,96,128,1,fp8,fp8,0,0.04806720018386841
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,1,128,1,float16,float16,0,0.03499839901924133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,1,128,1,float16,fp8,0,0.03503359854221344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,1,128,1,fp8,fp8,0,0.03511520028114319
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,2,128,1,float16,float16,0,0.034999999403953555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,1,128,1,float16,float16,0,0.05978879928588867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,2,128,1,float16,fp8,0,0.035120001435279845
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,2,128,1,fp8,fp8,0,0.03596799969673157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,1,128,1,fp8,fp8,0,0.10692640542984008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,4,128,1,float16,float16,0,0.035104000568389894
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,4,128,1,float16,fp8,0,0.03650720119476318
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,4,128,1,fp8,fp8,0,0.035102400183677676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,8,128,1,float16,float16,0,0.03512159883975983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,8,128,1,float16,fp8,0,0.035521599650383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,8,128,1,fp8,fp8,0,0.03500800132751465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,96,128,1,float16,float16,0,0.026862400770187377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,96,128,1,float16,fp8,0,0.028923198580741882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,96,128,1,fp8,fp8,0,0.02693760097026825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,1,128,1,float16,float16,0,0.02128159999847412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,1,128,1,float16,fp8,0,0.022668799757957457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,1,128,1,fp8,fp8,0,0.020873600244522096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,2,128,1,float16,float16,0,0.021118399500846863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,2,128,1,float16,fp8,0,0.02274720072746277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,2,128,1,fp8,fp8,0,0.021592000126838685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,4,128,1,float16,float16,0,0.021624000370502473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,4,128,1,float16,fp8,0,0.02268480062484741
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,8,128,1,float16,float16,0,0.022804799675941467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,8,128,1,float16,fp8,0,0.022536000609397887
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,8,128,1,fp8,fp8,0,0.022700800001621245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,96,128,1,float16,float16,0,0.018947200477123262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,96,128,1,float16,fp8,0,0.020612800121307374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,1,128,1,float16,float16,0,0.016708800196647645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,1,128,1,float16,fp8,0,0.016572800278663636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,1,128,1,fp8,fp8,0,0.016590400040149687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,2,128,1,float16,float16,0,0.01652960032224655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,2,128,1,float16,fp8,0,0.016673600673675536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,2,128,1,fp8,fp8,0,0.01658399999141693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,4,128,1,float16,float16,0,0.016672000288963318
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,4,128,1,float16,fp8,0,0.01666080057621002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,4,128,1,fp8,fp8,0,0.016628800332546233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,8,128,1,float16,float16,0,0.01661120057106018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,8,128,1,float16,fp8,0,0.01658879965543747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,8,128,1,fp8,fp8,0,0.016606399416923524
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,1,128,1,float16,fp8,0,0.059862399101257326
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,96,128,1,float16,float16,0,0.014531199634075165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,96,128,1,float16,fp8,0,0.01465120017528534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,96,128,1,fp8,fp8,0,0.014552000164985656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,1,128,1,float16,float16,0,0.012649600207805634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,1,128,1,fp8,fp8,0,0.012768000364303589
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,2,128,1,float16,float16,0,0.012572799623012543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,4,128,1,fp8,fp8,0,0.02210559993982315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,2,128,1,fp8,fp8,0,0.012468799948692322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,4,128,1,float16,float16,0,0.01250240057706833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,4,128,1,float16,fp8,0,0.012483199685811996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,96,128,1,fp8,fp8,0,0.020585599541664123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,4,128,1,fp8,fp8,0,0.012481600046157837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,8,128,1,float16,float16,0,0.01250080019235611
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,8,128,1,float16,fp8,0,0.012534399330615998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,8,128,1,fp8,fp8,0,0.012656000256538392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,1,128,1,float16,float16,0,0.5752336025238037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,1,128,1,float16,fp8,0,0.5897071838378907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,1,128,1,float16,fp8,0,0.012385600060224534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,1,128,1,fp8,fp8,0,0.5893871784210205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,2,128,1,float16,fp8,0,0.012668800354003907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,2,128,1,float16,float16,0,0.5757184028625488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,2,128,1,float16,fp8,0,0.5892047882080078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,2,128,1,fp8,fp8,0,0.5946368217468262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,4,128,1,float16,float16,0,0.5799007892608643
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,4,128,1,float16,fp8,0,0.5881999969482422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,4,128,1,fp8,fp8,0,0.595030403137207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,8,128,1,float16,float16,0,0.5886176109313965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,8,128,1,float16,fp8,0,0.5881663799285889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,8,128,1,fp8,fp8,0,0.5943456172943116
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,96,128,1,float16,float16,0,0.40950717926025393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,96,128,1,float16,fp8,0,0.3957056045532227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,1,128,1,float16,float16,0,0.29563839435577394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,96,128,1,fp8,fp8,0,0.3987807989120483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,1,128,1,float16,fp8,0,0.2998703956604004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,1,128,1,fp8,fp8,0,0.3001919984817505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,2,128,1,float16,float16,0,0.298471999168396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,2,128,1,float16,fp8,0,0.2997551918029785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,2,128,1,fp8,fp8,0,0.3001391887664795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,4,128,1,float16,float16,0,0.301313591003418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,4,128,1,fp8,fp8,0,0.29953598976135254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,4,128,1,float16,fp8,0,0.3019711971282959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,8,128,1,float16,float16,0,0.3012448072433472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,8,128,1,float16,fp8,0,0.30341761112213134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,8,128,1,fp8,fp8,0,0.2993520021438599
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,96,128,1,float16,float16,0,0.21484000682830812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,96,128,1,float16,fp8,0,0.20325119495391847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,1,128,1,float16,float16,0,0.15365920066833497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,96,128,1,fp8,fp8,0,0.20629758834838868
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,1,128,1,fp8,fp8,0,0.15455520153045654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,2,128,1,float16,float16,0,0.1543007969856262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,2,128,1,float16,fp8,0,0.15770720243453978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,2,128,1,fp8,fp8,0,0.1549343943595886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,4,128,1,float16,float16,0,0.1548527956008911
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,4,128,1,float16,fp8,0,0.1582527995109558
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,4,128,1,fp8,fp8,0,0.15410079956054687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,8,128,1,float16,float16,0,0.15780479907989503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,8,128,1,fp8,fp8,0,0.15429760217666627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,96,128,1,float16,float16,0,0.11157759428024291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,96,128,1,float16,fp8,0,0.10776000022888184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,96,128,1,fp8,fp8,0,0.10845439434051514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,1,128,1,float16,float16,0,0.08258240222930908
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,1,128,1,float16,fp8,0,0.08233759999275207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,1,128,1,fp8,fp8,0,0.08219199776649475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,2,128,1,float16,float16,0,0.08387680053710937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,2,128,1,float16,fp8,0,0.0822431981563568
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,2,128,1,fp8,fp8,0,0.08231840133666993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,4,128,1,float16,float16,0,0.08409280180931092
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,4,128,1,float16,fp8,0,0.08229439854621887
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,4,128,1,fp8,fp8,0,0.0823535978794098
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,8,128,1,float16,float16,0,0.08457120060920716
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,8,128,1,float16,fp8,0,0.08245919942855835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,8,128,1,fp8,fp8,0,0.0822655975818634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,96,128,1,float16,float16,0,0.06190400123596192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,96,128,1,float16,fp8,0,0.05897279977798462
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,96,128,1,fp8,fp8,0,0.059592002630233766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,1,128,1,float16,float16,0,0.04551199972629547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,1,128,1,float16,fp8,0,0.04747520089149475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,1,128,1,fp8,fp8,0,0.04563679993152618
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,2,128,1,float16,float16,0,0.04545280039310455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,2,128,1,float16,fp8,0,0.04546239972114563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,2,128,1,fp8,fp8,0,0.047193598747253415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,4,128,1,float16,float16,0,0.04547840058803558
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,4,128,1,float16,fp8,0,0.047152000665664676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,4,128,1,fp8,fp8,0,0.04581120014190674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,1,128,1,float16,fp8,0,0.15637600421905518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,8,128,1,float16,fp8,0,0.047163200378417966
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,8,128,1,fp8,fp8,0,0.04726400077342987
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,96,128,1,float16,float16,0,0.033339199423789975
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,96,128,1,fp8,fp8,0,0.03501439988613129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,1,128,1,float16,float16,0,0.028870400786399842
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,1,128,1,float16,fp8,0,0.02879360020160675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,8,128,1,float16,fp8,0,0.15731680393218994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,1,128,1,fp8,fp8,0,0.028865599632263185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,2,128,1,float16,fp8,0,0.028857600688934327
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,2,128,1,fp8,fp8,0,0.028867200016975403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,4,128,1,float16,float16,0,0.02894560098648071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,4,128,1,float16,fp8,0,0.028887999057769776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,4,128,1,fp8,fp8,0,0.028867200016975403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,8,128,1,float16,float16,0,0.028881600499153136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,8,128,1,float16,fp8,0,0.0288783997297287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,8,128,1,fp8,fp8,0,0.028891199827194215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,96,128,1,float16,float16,0,0.020678399503231047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,96,128,1,float16,fp8,0,0.020769600570201874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,96,128,1,fp8,fp8,0,0.020788800716400147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,1,128,1,float16,float16,0,0.018568000197410582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,1,128,1,float16,fp8,0,0.018688000738620758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,1,128,1,fp8,fp8,0,0.018748800456523895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,2,128,1,float16,float16,0,0.018651199340820313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,2,128,1,float16,fp8,0,0.01870400011539459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,2,128,1,fp8,fp8,0,0.01860959976911545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,8,128,1,float16,float16,0,0.046972799301147464
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,4,128,1,float16,float16,0,0.018638400733470915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,4,128,1,float16,fp8,0,0.01875839978456497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,4,128,1,fp8,fp8,0,0.018665599822998046
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,8,128,1,float16,float16,0,0.01871200054883957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,8,128,1,float16,fp8,0,0.01870400011539459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,8,128,1,fp8,fp8,0,0.018668800592422485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,2,128,1,float16,float16,0,0.02890399992465973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,96,128,1,float16,float16,0,0.01666879951953888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,96,128,1,fp8,fp8,0,0.016590400040149687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,1,128,1,float16,float16,0,0.014612799882888794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,1,128,1,float16,fp8,0,0.014739200472831726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,1,128,1,fp8,fp8,0,0.014646400511264802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,2,128,1,float16,float16,0,0.014582400023937226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,2,128,1,float16,fp8,0,0.014550399780273438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,2,128,1,fp8,fp8,0,0.014585599303245544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,4,128,1,float16,float16,0,0.014558400213718414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,4,128,1,float16,fp8,0,0.014614400267601014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,4,128,1,fp8,fp8,0,0.014697599411010741
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,8,128,1,float16,float16,0,0.014689600467681885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,8,128,1,float16,fp8,0,0.014691199362277984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,8,128,1,fp8,fp8,0,0.01459999978542328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,96,128,1,float16,float16,0,0.014727999269962311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,96,128,1,float16,fp8,0,0.0350959986448288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,96,128,1,float16,fp8,0,0.012555199861526489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,96,128,1,fp8,fp8,0,0.012703999876976013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,1,128,1,float16,fp8,0,0.012532800436019897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,1,128,1,fp8,fp8,0,0.012534399330615998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,2,128,1,float16,float16,0,0.01080000028014183
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,2,128,1,float16,fp8,0,0.012556800246238708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,2,128,1,fp8,fp8,0,0.01257600039243698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,4,128,1,float16,float16,0,0.011271999776363372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,4,128,1,float16,fp8,0,0.01250080019235611
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,4,128,1,fp8,fp8,0,0.012503999471664428
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,8,128,1,float16,float16,0,0.01252799928188324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,8,128,1,float16,fp8,0,0.012582400441169738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,8,128,1,fp8,fp8,0,0.012436799705028534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,1,128,1,float16,float16,0,0.49302082061767577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,1,128,1,float16,fp8,0,0.5066736221313477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,1,128,1,fp8,fp8,0,0.5039936065673828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,1,128,1,float16,float16,0,0.011048000305891037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,2,128,1,float16,fp8,0,0.5041488170623779
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,96,128,1,float16,fp8,0,0.01659359931945801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,2,128,1,fp8,fp8,0,0.5043168067932129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,4,128,1,float16,float16,0,0.493942403793335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,4,128,1,fp8,fp8,0,0.5040544033050537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,8,128,1,float16,float16,0,0.49894399642944337
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,8,128,1,float16,fp8,0,0.5036687850952148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,96,128,1,float16,float16,0,0.3114415884017944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,8,128,1,fp8,fp8,0,0.5038352012634277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,96,128,1,float16,fp8,0,0.30554239749908446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,96,128,1,fp8,fp8,0,0.30560479164123533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,1,128,1,float16,float16,0,0.25341761112213135
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,1,128,1,float16,fp8,0,0.25803680419921876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,1,128,1,fp8,fp8,0,0.25828640460968016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,2,128,1,float16,float16,0,0.2532383918762207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,2,128,1,float16,fp8,0,0.25801920890808105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,2,128,1,fp8,fp8,0,0.2576479911804199
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,4,128,1,float16,float16,0,0.25579841136932374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,4,128,1,float16,fp8,0,0.2573040008544922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,4,128,1,fp8,fp8,0,0.25858559608459475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,8,128,1,float16,float16,0,0.256110405921936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,4,128,1,float16,fp8,0,0.5084847927093505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,8,128,1,float16,fp8,0,0.2571471929550171
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,8,128,1,fp8,fp8,0,0.25718719959259034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,96,128,1,float16,float16,0,0.16448479890823364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,96,128,1,float16,fp8,0,0.1580288052558899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,96,128,1,fp8,fp8,0,0.15790719985961915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,1,128,1,float16,float16,0,0.1347216010093689
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,1,128,1,float16,fp8,0,0.1326591968536377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,1,128,1,fp8,fp8,0,0.13260639905929567
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,2,128,1,float16,float16,0,0.13513760566711425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,2,128,1,float16,fp8,0,0.13294559717178345
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,2,128,1,fp8,fp8,0,0.1329103946685791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,4,128,1,float16,float16,0,0.13572479486465455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,4,128,1,float16,fp8,0,0.13245760202407836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,4,128,1,fp8,fp8,0,0.1333791971206665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,8,128,1,float16,float16,0,0.13654400110244752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,8,128,1,fp8,fp8,0,0.1335584044456482
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,8,128,1,float16,fp8,0,0.1333456039428711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,96,128,1,float16,float16,0,0.0868943989276886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,96,128,1,float16,fp8,0,0.08429279923439026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,96,128,1,fp8,fp8,0,0.0841376006603241
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,1,128,1,float16,float16,0,0.07133600115776062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,1,128,1,float16,fp8,0,0.07196000218391418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,2,128,1,float16,float16,0,0.07129279971122741
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,2,128,1,float16,float16,0,0.4961440086364746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,2,128,1,fp8,fp8,0,0.07211999893188477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,4,128,1,float16,float16,0,0.07219520211219788
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,4,128,1,float16,fp8,0,0.07215200066566467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,8,128,1,float16,float16,0,0.07238879799842834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,8,128,1,float16,fp8,0,0.07203999757766724
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,8,128,1,fp8,fp8,0,0.0720848023891449
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,96,128,1,float16,float16,0,0.04586719870567322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,96,128,1,float16,fp8,0,0.047539201378822324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,96,128,1,fp8,fp8,0,0.0474592000246048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,1,128,1,float16,float16,0,0.041280001401901245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,1,128,1,float16,fp8,0,0.04123519957065582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,1,128,1,fp8,fp8,0,0.04133439958095551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,2,128,1,float16,float16,0,0.041294398903846743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,2,128,1,float16,fp8,0,0.04115839898586273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,1,128,1,fp8,fp8,0,0.07120479941368103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,2,128,1,fp8,fp8,0,0.041140800714492796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,4,128,1,float16,float16,0,0.04036479890346527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,4,128,1,float16,fp8,0,0.04124000072479248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,4,128,1,fp8,fp8,0,0.07220320105552673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,4,128,1,fp8,fp8,0,0.04119040071964264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,8,128,1,float16,float16,0,0.041417598724365234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,8,128,1,float16,fp8,0,0.04108000099658966
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,8,128,1,fp8,fp8,0,0.04155679941177368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,96,128,1,float16,float16,0,0.026969599723815917
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,96,128,1,float16,fp8,0,0.02911359965801239
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,96,128,1,fp8,fp8,0,0.028865599632263185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,1,128,1,float16,float16,0,0.02503199875354767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,1,128,1,float16,fp8,0,0.024846400320529937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,1,128,1,fp8,fp8,0,0.02499839961528778
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,2,128,1,float16,fp8,0,0.025051200389862062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,2,128,1,fp8,fp8,0,0.024991999566555022
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,4,128,1,float16,float16,0,0.025147199630737305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,4,128,1,float16,fp8,0,0.026804798841476442
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,4,128,1,fp8,fp8,0,0.025817599892616273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,8,128,1,float16,float16,0,0.026009601354599
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,8,128,1,float16,fp8,0,0.026651200652122498
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,8,128,1,fp8,fp8,0,0.0250575989484787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,2,128,1,float16,fp8,0,0.07204480171203613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,96,128,1,float16,fp8,0,0.018590399622917177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,96,128,1,fp8,fp8,0,0.01886879950761795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,1,128,1,float16,float16,0,0.016675199568271636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,1,128,1,float16,fp8,0,0.016627199947834015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,1,128,1,fp8,fp8,0,0.016705599427223206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,2,128,1,float16,float16,0,0.01666879951953888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,2,128,1,float16,float16,0,0.024851199984550477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,2,128,1,fp8,fp8,0,0.016804799437522888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,4,128,1,float16,float16,0,0.01672320067882538
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,4,128,1,float16,fp8,0,0.016707199811935424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,4,128,1,fp8,fp8,0,0.016708800196647645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,8,128,1,float16,float16,0,0.017393599450588226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,8,128,1,float16,fp8,0,0.016735999286174773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,8,128,1,fp8,fp8,0,0.01720159947872162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,96,128,1,float16,float16,0,0.016737599670886994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,96,128,1,fp8,fp8,0,0.014699199795722961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,96,128,1,float16,fp8,0,0.015246400237083435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,1,128,1,float16,float16,0,0.012590399384498597
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,1,128,1,fp8,fp8,0,0.013307200372219085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,1,128,1,float16,fp8,0,0.015031999349594117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,96,128,1,float16,float16,0,0.018884800374507904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,2,128,1,float16,float16,0,0.012598399817943574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,2,128,1,fp8,fp8,0,0.01454080045223236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,2,128,1,float16,fp8,0,0.015222400426864624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,4,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,4,128,1,fp8,fp8,0,0.014849600195884705
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,8,128,1,float16,fp8,0,0.014868800342082978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,8,128,1,fp8,fp8,0,0.014812800288200378
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,96,128,1,float16,float16,0,0.013940800726413728
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,96,128,1,float16,fp8,0,0.012603199481964112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,96,128,1,fp8,fp8,0,0.010835199803113937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,1,128,1,float16,float16,0,0.010838399827480315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,1,128,1,float16,fp8,0,0.010923200100660325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,1,128,1,fp8,fp8,0,0.010742399841547012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,2,128,1,float16,float16,0,0.010811199992895126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,2,128,1,float16,fp8,0,0.010751999914646149
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,2,128,1,fp8,fp8,0,0.010705599933862687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,4,128,1,float16,float16,0,0.010667199641466141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,4,128,1,float16,fp8,0,0.010729599744081497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,4,128,1,float16,float16,0,0.01249919980764389
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,4,128,1,fp8,fp8,0,0.010790400207042694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,8,128,1,float16,float16,0,0.014505599439144135
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,8,128,1,float16,fp8,0,0.010702399909496308
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,8,128,1,fp8,fp8,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,1,128,1,float16,float16,0,0.4797584056854248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,1,128,1,float16,fp8,0,0.45787200927734373
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,2,128,1,float16,fp8,0,0.016659200191497803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,2,128,1,float16,float16,0,0.4782447814941406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,1,128,1,fp8,fp8,0,0.4613743782043457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,2,128,1,float16,fp8,0,0.4556159973144531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,2,128,1,fp8,fp8,0,0.4577343940734863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,4,128,1,float16,float16,0,0.48081440925598146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,4,128,1,float16,fp8,0,0.45469279289245607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,4,128,1,fp8,fp8,0,0.4571968078613281
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,8,128,1,float16,fp8,0,0.45487360954284667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,8,128,1,float16,float16,0,0.4834911823272705
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,8,128,1,fp8,fp8,0,0.45719518661499026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,96,128,1,float16,fp8,0,0.26040799617767335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,8,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,96,128,1,fp8,fp8,0,0.25855040550231934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,1,128,1,float16,float16,0,0.24744319915771484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,1,128,1,float16,fp8,0,0.231825590133667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,1,128,1,fp8,fp8,0,0.23289599418640136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,2,128,1,float16,float16,0,0.2478559970855713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,2,128,1,float16,fp8,0,0.23188319206237792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,2,128,1,fp8,fp8,0,0.23331999778747559
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,4,128,1,float16,float16,0,0.2487855911254883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,4,128,1,float16,fp8,0,0.23186719417572021
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,4,128,1,fp8,fp8,0,0.2323456048965454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,8,128,1,float16,fp8,0,0.23173279762268068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,8,128,1,fp8,fp8,0,0.23274240493774415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,96,128,1,float16,float16,0,0.1412160038948059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,96,128,1,float16,fp8,0,0.13358559608459472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,96,128,1,fp8,fp8,0,0.13265279531478882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,1,128,1,float16,float16,0,0.12808640003204347
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,1,128,1,float16,fp8,0,0.11911200284957886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,1,128,1,fp8,fp8,0,0.12081600427627563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,2,128,1,float16,float16,0,0.12853280305862427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,2,128,1,float16,fp8,0,0.12114559412002564
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,96,128,1,float16,float16,0,0.2741935968399048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,4,128,1,float16,float16,0,0.1277008056640625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,4,128,1,float16,fp8,0,0.11928800344467164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,4,128,1,fp8,fp8,0,0.11951520442962646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,8,128,1,float16,float16,0,0.12741600275039672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,8,128,1,float16,fp8,0,0.1192896008491516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,8,128,1,float16,float16,0,0.24941599369049072
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,8,128,1,fp8,fp8,0,0.11912959814071655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,96,128,1,float16,fp8,0,0.06995199918746949
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,96,128,1,fp8,fp8,0,0.07010080218315125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,1,128,1,float16,float16,0,0.06843039989471436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,1,128,1,float16,fp8,0,0.06403040289878845
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,1,128,1,fp8,fp8,0,0.06409119963645935
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,2,128,1,float16,float16,0,0.06863200068473815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,2,128,1,fp8,fp8,0,0.12142399549484253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,2,128,1,float16,fp8,0,0.0638256013393402
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,4,128,1,float16,float16,0,0.06848000288009644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,4,128,1,float16,fp8,0,0.0637503981590271
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,4,128,1,fp8,fp8,0,0.06394400000572205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,8,128,1,float16,float16,0,0.0696560025215149
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,8,128,1,float16,fp8,0,0.06411679983139038
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,8,128,1,fp8,fp8,0,0.06397759914398193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,96,128,1,float16,float16,0,0.04192480146884918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,96,128,1,float16,fp8,0,0.04122079908847809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,96,128,1,float16,float16,0,0.07567359805107117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,96,128,1,fp8,fp8,0,0.03945600092411041
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,1,128,1,fp8,fp8,0,0.03702400028705597
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,2,128,1,float16,float16,0,0.03908160030841827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,2,128,1,float16,fp8,0,0.037031999230384825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,2,128,1,fp8,fp8,0,0.037057599425315856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,4,128,1,float16,float16,0,0.03921439945697784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,4,128,1,float16,fp8,0,0.03708159923553467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,4,128,1,fp8,fp8,0,0.03704639971256256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,8,128,1,float16,float16,0,0.03912000060081482
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,8,128,1,float16,fp8,0,0.03717280030250549
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,2,128,1,fp8,fp8,0,0.06580960154533386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,8,128,1,fp8,fp8,0,0.03708640038967133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,96,128,1,float16,float16,0,0.02698880136013031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,96,128,1,float16,fp8,0,0.02493920028209686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,96,128,1,fp8,fp8,0,0.02489439994096756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,1,128,1,float16,float16,0,0.024820800125598907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,1,128,1,float16,fp8,0,0.024795199930667877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,1,128,1,fp8,fp8,0,0.022856000065803527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,2,128,1,float16,float16,0,0.024897600710391998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,2,128,1,float16,fp8,0,0.02284799963235855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,2,128,1,fp8,fp8,0,0.024710400402545928
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,4,128,1,float16,float16,0,0.024771200120449068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,4,128,1,float16,fp8,0,0.02438880056142807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,8,128,1,float16,float16,0,0.024881599843502043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,8,128,1,float16,fp8,0,0.02473440021276474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,8,128,1,fp8,fp8,0,0.024675199389457704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,1,128,1,float16,float16,0,0.03915840089321136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,96,128,1,float16,float16,0,0.018681600689888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,96,128,1,float16,fp8,0,0.016568000614643096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,96,128,1,fp8,fp8,0,0.01679999977350235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,1,128,1,float16,float16,0,0.01652960032224655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,1,128,1,float16,fp8,0,0.016732800006866454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,1,128,1,fp8,fp8,0,0.016527999937534333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,2,128,1,float16,fp8,0,0.01655679941177368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,2,128,1,fp8,fp8,0,0.01664000004529953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,4,128,1,float16,float16,0,0.016569599509239197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,4,128,1,float16,fp8,0,0.01655679941177368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,4,128,1,fp8,fp8,0,0.01656319946050644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,8,128,1,float16,float16,0,0.016551999747753142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,8,128,1,float16,fp8,0,0.016550399363040924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,8,128,1,fp8,fp8,0,0.016519999504089354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,96,128,1,float16,float16,0,0.014567999541759491
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,96,128,1,float16,fp8,0,0.012515200674533844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,96,128,1,fp8,fp8,0,0.01252640038728714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,1,128,1,float16,float16,0,0.012486399710178375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,1,128,1,float16,fp8,0,0.012455999851226807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,1,128,1,fp8,fp8,0,0.012467200309038163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,4,128,1,fp8,fp8,0,0.022788800299167633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,2,128,1,float16,float16,0,0.012591999769210816
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,2,128,1,float16,fp8,0,0.012488000094890594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,2,128,1,fp8,fp8,0,0.012521600723266602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,4,128,1,float16,float16,0,0.012516799569129943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,4,128,1,float16,fp8,0,0.01250080019235611
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,4,128,1,fp8,fp8,0,0.012740799784660339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,8,128,1,float16,float16,0,0.012601600587368011
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,8,128,1,float16,fp8,0,0.01249919980764389
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,8,128,1,fp8,fp8,0,0.01250240057706833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,96,128,1,float16,float16,0,0.012529599666595458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,96,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,1,128,1,float16,fp8,0,0.03725599944591522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,96,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,1,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,2,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,1,128,1,fp8,fp8,0,0.010704000294208527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,2,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,2,128,1,fp8,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,4,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,4,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,8,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,8,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,8,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,1,128,1,float16,float16,0,0.47035040855407717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,1,128,1,float16,fp8,0,0.010649599879980088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,2,128,1,float16,float16,0,0.016700799763202667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,4,128,1,float16,fp8,0,0.010552000254392624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,1,128,1,float16,fp8,0,0.44271039962768555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,1,128,1,fp8,fp8,0,0.43990559577941896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,2,128,1,float16,float16,0,0.4699295997619629
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,2,128,1,float16,fp8,0,0.4427840232849121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,2,128,1,fp8,fp8,0,0.43948798179626464
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,4,128,1,float16,float16,0,0.4698783874511719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,4,128,1,float16,fp8,0,0.4427840232849121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,4,128,1,fp8,fp8,0,0.44009599685668943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,8,128,1,float16,float16,0,0.46997761726379395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,8,128,1,float16,fp8,0,0.44268321990966797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,8,128,1,fp8,fp8,0,0.44041919708251953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,96,128,1,float16,float16,0,0.24223840236663818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,96,128,1,float16,fp8,0,0.22804479598999022
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,96,128,1,fp8,fp8,0,0.22598400115966796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,1,128,1,float16,float16,0,0.23993439674377443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,1,128,1,float16,fp8,0,0.22602720260620118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,2,128,1,float16,float16,0,0.24012479782104493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,2,128,1,float16,fp8,0,0.22621920108795165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,4,128,1,float16,float16,0,0.24234240055084227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,4,128,1,float16,fp8,0,0.22631680965423584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,4,128,1,fp8,fp8,0,0.22385120391845703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,0,0.24256160259246826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,0,0.2238624095916748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,96,128,1,float16,float16,0,0.12821279764175414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,8,128,1,fp8,fp8,0,0.22649600505828857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,96,128,1,float16,fp8,0,0.11747039556503296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,96,128,1,fp8,fp8,0,0.11926079988479614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,1,128,1,float16,float16,0,0.12563199996948243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,1,128,1,float16,fp8,0,0.11877119541168213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,1,128,1,fp8,fp8,0,0.11727039813995362
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,2,128,1,float16,fp8,0,0.11726880073547363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,2,128,1,fp8,fp8,0,0.11709599494934082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,4,128,1,float16,float16,0,0.12562559843063353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,2,128,1,fp8,fp8,0,0.22358880043029786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,4,128,1,float16,fp8,0,0.11712160110473632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,4,128,1,fp8,fp8,0,0.11718560457229614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,0,0.11697599887847901
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,8,128,1,fp8,fp8,0,0.1171887993812561
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,96,128,1,float16,float16,0,0.06988000273704528
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,96,128,1,float16,fp8,0,0.06370880007743836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,96,128,1,fp8,fp8,0,0.06370880007743836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,1,128,1,float16,float16,0,0.06790080070495605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,1,128,1,float16,fp8,0,0.06371200084686279
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,1,128,1,fp8,fp8,0,0.06368640065193176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,2,128,1,float16,float16,0,0.0677456021308899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,2,128,1,float16,fp8,0,0.06368640065193176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,2,128,1,float16,float16,0,0.12766079902648925
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,2,128,1,fp8,fp8,0,0.06378080248832703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,4,128,1,float16,float16,0,0.06799359917640686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,4,128,1,float16,fp8,0,0.06362400054931641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,0,0.06796960234642029
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,1,128,1,fp8,fp8,0,0.22522239685058593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,0,0.06367520093917847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,8,128,1,fp8,fp8,0,0.06384639739990235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,96,128,1,float16,float16,0,0.0412559986114502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,96,128,1,float16,fp8,0,0.03714720010757446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,96,128,1,fp8,fp8,0,0.037062400579452516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,1,128,1,float16,float16,0,0.03927040100097656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,1,128,1,fp8,fp8,0,0.037115201354026794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,0,0.12562719583511353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,2,128,1,float16,float16,0,0.03924480080604553
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,2,128,1,float16,fp8,0,0.037036800384521486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,2,128,1,fp8,fp8,0,0.03716000020503998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,4,128,1,float16,float16,0,0.03916000127792359
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,4,128,1,float16,fp8,0,0.03714079856872558
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,4,128,1,fp8,fp8,0,0.03705599904060364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,0,0.039166399836540224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,8,128,1,fp8,fp8,0,0.03712959885597229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,96,128,1,float16,float16,0,0.02701599895954132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,96,128,1,float16,fp8,0,0.024771200120449068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,96,128,1,fp8,fp8,0,0.024531200528144836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,1,128,1,float16,float16,0,0.024937599897384644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,1,128,1,float16,fp8,0,0.022910399734973906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,1,128,1,fp8,fp8,0,0.022867199778556824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,2,128,1,float16,float16,0,0.024905599653720856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,2,128,1,float16,fp8,0,0.02295999974012375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,4,128,1,fp8,fp8,0,0.06382240056991577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,2,128,1,fp8,fp8,0,0.022950400412082673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,4,128,1,float16,float16,0,0.024775999784469604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,4,128,1,float16,fp8,0,0.02292799949645996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,4,128,1,fp8,fp8,0,0.024422399699687958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,0,0.024806399643421174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,0,0.024249599874019624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,8,128,1,fp8,fp8,0,0.02287999987602234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,1,128,1,float16,fp8,0,0.03708159923553467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,96,128,1,float16,float16,0,0.018769599497318268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,96,128,1,fp8,fp8,0,0.016599999368190767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,1,128,1,float16,float16,0,0.01671680063009262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,1,128,1,fp8,fp8,0,0.016633599996566772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,2,128,1,float16,float16,0,0.016715200245380403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,2,128,1,float16,fp8,0,0.016519999504089354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,0,0.03710559904575348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,2,128,1,fp8,fp8,0,0.016579200327396394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,4,128,1,float16,float16,0,0.016627199947834015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,4,128,1,float16,fp8,0,0.016599999368190767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,4,128,1,fp8,fp8,0,0.016543999314308167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,0,0.016652800142765045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,0,0.016527999937534333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,8,128,1,fp8,fp8,0,0.015110400319099427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,96,128,1,float16,float16,0,0.01478559970855713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,96,128,1,float16,fp8,0,0.012593600153923034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,96,128,1,fp8,fp8,0,0.012612800300121307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,1,128,1,float16,float16,0,0.012960000336170197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,1,128,1,float16,fp8,0,0.012529599666595458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,1,128,1,fp8,fp8,0,0.012548799812793731
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,2,128,1,float16,float16,0,0.012571200728416443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,2,128,1,float16,fp8,0,0.01268640011548996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,2,128,1,fp8,fp8,0,0.012600000202655792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,4,128,1,float16,float16,0,0.01305440068244934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,4,128,1,float16,fp8,0,0.012727999687194824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,4,128,1,fp8,fp8,0,0.012636800110340119
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,0,0.014107200503349304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,1,128,1,float16,fp8,0,0.015435199439525604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,0,0.012580800056457519
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,8,128,1,fp8,fp8,0,0.012580800056457519
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,96,128,1,float16,float16,0,0.01255359947681427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,96,128,1,float16,fp8,0,0.010547199845314026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,96,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,1,128,1,float16,float16,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,1,128,1,fp8,fp8,0,0.010567999631166457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,2,128,1,float16,float16,0,0.010542400181293488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,2,128,1,float16,fp8,0,0.010590399801731109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,2,128,1,fp8,fp8,0,0.010579200088977813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,4,128,1,float16,float16,0,0.010625600069761276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,4,128,1,float16,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,4,128,1,fp8,fp8,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,8,128,1,fp8,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,96,128,1,float16,fp8,0,0.01677920073270798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,1,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,1,128,1,fp8,fp8,0,22.411967468261718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,1,128,1,float16,fp8,0,25.174659729003906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,2,128,1,float16,float16,0,38.121099853515624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,1,128,1,float16,float16,0,40.91609497070313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,2,128,1,float16,fp8,0,24.249046325683594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,2,128,1,fp8,fp8,0,26.329898071289062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,4,128,1,float16,fp8,0,26.18759765625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,4,128,1,fp8,fp8,0,24.308181762695312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,4,128,1,float16,float16,0,39.17619934082031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,8,128,1,float16,fp8,0,26.361328125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,8,128,1,float16,float16,0,42.35333251953125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,8,128,1,fp8,fp8,0,24.476643371582032
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,64,128,1,float16,fp8,0,12.861582946777343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,64,128,1,fp8,fp8,0,12.392486572265625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,1,128,1,float16,float16,0,19.796060180664064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,1,128,1,float16,fp8,0,12.965028381347656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,1,128,1,fp8,fp8,0,12.363022613525391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,2,128,1,float16,fp8,0,13.456221008300782
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,2,128,1,fp8,fp8,0,12.752210998535157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,2,128,1,float16,float16,0,19.80657196044922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,4,128,1,float16,fp8,0,12.635633850097657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,4,128,1,fp8,fp8,0,11.971463775634765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,4,128,1,float16,float16,0,21.426838684082032
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,8,128,1,float16,fp8,0,12.270081329345704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,8,128,1,fp8,fp8,0,12.563673400878907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,8,128,1,float16,float16,0,21.239533996582033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,64,128,1,float16,fp8,0,6.523709106445312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,64,128,1,fp8,fp8,0,6.227659225463867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,1,128,1,float16,fp8,0,5.737209701538086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,1,128,1,float16,float16,0,10.468377685546875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,1,128,1,fp8,fp8,0,5.9772895812988285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,2,128,1,float16,fp8,0,6.125537490844726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,2,128,1,float16,float16,0,10.28997573852539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,2,128,1,fp8,fp8,0,6.305827331542969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,4,128,1,float16,float16,0,9.950497436523438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,4,128,1,float16,fp8,0,6.077257537841797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,4,128,1,fp8,fp8,0,6.1833454132080075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,8,128,1,float16,float16,0,9.847624206542969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,8,128,1,float16,fp8,0,6.234067153930664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,8,128,1,fp8,fp8,0,6.276283264160156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,64,128,1,float16,fp8,0,2.9770511627197265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,64,128,1,fp8,fp8,0,2.9667919158935545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,1,128,1,float16,float16,0,4.219206237792969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,1,128,1,float16,fp8,0,2.969326400756836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,1,128,1,fp8,fp8,0,2.805835151672363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,2,128,1,float16,float16,0,5.015963363647461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,2,128,1,float16,fp8,0,2.9701967239379883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,2,128,1,fp8,fp8,0,2.9156816482543944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,4,128,1,float16,float16,0,4.194094467163086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,4,128,1,float16,fp8,0,3.2176864624023436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,4,128,1,fp8,fp8,0,3.0647632598876955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,8,128,1,float16,float16,0,4.760779190063476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,8,128,1,float16,fp8,0,3.0003664016723635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,64,128,1,float16,float16,0,3.694136047363281
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,8,128,1,fp8,fp8,0,3.162447929382324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,64,128,1,float16,float16,0,9.247110748291016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,64,128,1,float16,float16,0,18.985316467285156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,1,128,1,float16,fp8,0,15.209440612792969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,1,128,1,fp8,fp8,0,12.775077056884765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,1,128,1,float16,float16,0,23.312184143066407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,2,128,1,float16,fp8,0,15.46474609375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,2,128,1,fp8,fp8,0,14.065692138671874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,2,128,1,float16,float16,0,22.582936096191407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,4,128,1,float16,fp8,0,15.515121459960938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,4,128,1,float16,float16,0,23.933033752441407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,4,128,1,fp8,fp8,0,13.788642883300781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,8,128,1,float16,float16,0,22.521531677246095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,8,128,1,float16,fp8,0,15.590184020996094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,8,128,1,fp8,fp8,0,14.903500366210938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,64,128,1,float16,fp8,0,6.8711090087890625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,64,128,1,float16,float16,0,11.160755157470703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,64,128,1,fp8,fp8,0,8.010208129882812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,1,128,1,float16,fp8,0,6.546873474121094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,1,128,1,fp8,fp8,0,6.336000061035156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,1,128,1,float16,float16,0,12.216512298583984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,2,128,1,float16,fp8,0,6.619640350341797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,2,128,1,fp8,fp8,0,6.784060668945313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,2,128,1,float16,float16,0,12.496646118164062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,4,128,1,float16,fp8,0,6.9572593688964846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,4,128,1,float16,float16,0,12.313346862792969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,4,128,1,fp8,fp8,0,6.964742279052734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,64,128,1,float16,float16,0,5.517668914794922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,8,128,1,float16,fp8,0,6.896292877197266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,8,128,1,fp8,fp8,0,7.262060546875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,8,128,1,float16,float16,0,12.560441589355468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,64,128,1,float16,fp8,0,3.6469470977783205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,64,128,1,fp8,fp8,0,3.442407989501953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,1,128,1,float16,fp8,0,3.546651077270508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,1,128,1,float16,float16,0,5.946460723876953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,1,128,1,fp8,fp8,0,3.288412857055664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,2,128,1,float16,float16,0,5.1218830108642575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,2,128,1,float16,fp8,0,3.1679712295532227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,2,128,1,fp8,fp8,0,3.5971969604492187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,4,128,1,float16,float16,0,4.800352096557617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,4,128,1,float16,fp8,0,3.3652976989746093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,4,128,1,fp8,fp8,0,3.4084239959716798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,64,128,1,float16,float16,0,2.2772991180419924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,8,128,1,float16,fp8,0,3.1862688064575195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,8,128,1,fp8,fp8,0,3.269900894165039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,8,128,1,float16,float16,0,6.033414459228515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,64,128,1,fp8,fp8,0,1.7804304122924806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,64,128,1,float16,fp8,0,2.065724754333496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,1,128,1,float16,float16,0,2.237603187561035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,1,128,1,float16,fp8,0,1.9863599777221679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,1,128,1,fp8,fp8,0,1.5629424095153808
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,2,128,1,float16,float16,0,2.2675472259521485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,2,128,1,float16,fp8,0,2.1826000213623047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,4,128,1,float16,float16,0,2.051851272583008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,4,128,1,fp8,fp8,0,1.6090192794799805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,4,128,1,float16,fp8,0,2.1682479858398436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,2,128,1,fp8,fp8,0,1.6381824493408204
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,8,128,1,float16,float16,0,2.506692886352539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,8,128,1,float16,fp8,0,1.5809760093688965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,8,128,1,fp8,fp8,0,1.7775808334350587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,1,128,1,fp8,fp8,0,9.497583770751953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,1,128,1,float16,fp8,0,9.66131820678711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,1,128,1,float16,float16,0,16.143536376953126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,2,128,1,float16,float16,0,17.67888641357422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,2,128,1,float16,fp8,0,9.583159637451171
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,2,128,1,fp8,fp8,0,9.947792053222656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,4,128,1,float16,fp8,0,10.329966735839843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,4,128,1,fp8,fp8,0,9.87029571533203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,4,128,1,float16,float16,0,17.635594177246094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,8,128,1,float16,float16,0,16.409339904785156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,8,128,1,float16,fp8,0,10.62747802734375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,8,128,1,fp8,fp8,0,9.733999633789063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,64,128,1,float16,float16,0,9.083372497558594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,64,128,1,float16,fp8,0,4.944753646850586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,64,128,1,fp8,fp8,0,5.697079849243164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,1,128,1,fp8,fp8,0,4.657223892211914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,1,128,1,float16,fp8,0,4.851534271240235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,1,128,1,float16,float16,0,7.401350402832032
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,2,128,1,float16,fp8,0,4.6252094268798825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,2,128,1,fp8,fp8,0,5.171281433105468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,2,128,1,float16,float16,0,8.99170379638672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,4,128,1,float16,float16,0,7.7648979187011715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,4,128,1,float16,fp8,0,4.717444610595703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,4,128,1,fp8,fp8,0,5.300616073608398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,8,128,1,float16,fp8,0,4.678734588623047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,8,128,1,fp8,fp8,0,4.8413551330566404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,64,128,1,float16,float16,0,4.09155502319336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,8,128,1,float16,float16,0,8.619116973876952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,64,128,1,float16,fp8,0,2.441524887084961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,64,128,1,fp8,fp8,0,2.4149999618530273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,1,128,1,float16,fp8,0,2.554020881652832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,1,128,1,fp8,fp8,0,2.3127296447753904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,1,128,1,float16,float16,0,4.301350402832031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,2,128,1,float16,float16,0,3.6496208190917967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,2,128,1,float16,fp8,0,2.200721549987793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,2,128,1,fp8,fp8,0,2.217207908630371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,4,128,1,fp8,fp8,0,2.5317840576171875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,4,128,1,float16,fp8,0,2.4529087066650392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,4,128,1,float16,float16,0,4.289076614379883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,8,128,1,float16,float16,0,3.0695167541503907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,64,128,1,float16,float16,0,2.15655517578125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,8,128,1,fp8,fp8,0,2.284796714782715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,64,128,1,float16,fp8,0,1.3944175720214844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,8,128,1,float16,fp8,0,2.5202800750732424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,1,128,1,float16,fp8,0,1.1249199867248536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,1,128,1,float16,float16,0,1.2734432220458984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,1,128,1,fp8,fp8,0,1.3844207763671874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,64,128,1,fp8,fp8,0,1.519923210144043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,2,128,1,float16,float16,0,1.2932592391967774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,2,128,1,float16,fp8,0,1.256158447265625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,2,128,1,fp8,fp8,0,1.2781968116760254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,4,128,1,float16,float16,0,1.322932815551758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,4,128,1,float16,fp8,0,1.1512944221496582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,4,128,1,fp8,fp8,0,1.1484880447387695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,8,128,1,float16,float16,0,1.3166671752929688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,8,128,1,float16,fp8,0,1.149180793762207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,8,128,1,fp8,fp8,0,1.117246437072754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,1,128,1,fp8,fp8,0,12.833311462402344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,1,128,1,float16,fp8,0,12.840293884277344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,1,128,1,float16,float16,0,23.359286499023437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,2,128,1,float16,float16,0,20.969607543945312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,2,128,1,float16,fp8,0,13.19500732421875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,2,128,1,fp8,fp8,0,12.906785583496093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,4,128,1,float16,fp8,0,14.490568542480469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,4,128,1,fp8,fp8,0,13.148713684082031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,4,128,1,float16,float16,0,23.1890625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,8,128,1,float16,float16,0,21.298538208007812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,8,128,1,fp8,fp8,0,13.211534118652343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,8,128,1,float16,fp8,0,14.007662963867187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,64,128,1,float16,fp8,0,6.560768127441406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,64,128,1,float16,float16,0,12.107672119140625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,64,128,1,fp8,fp8,0,6.870458984375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,1,128,1,float16,fp8,0,5.971260833740234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,1,128,1,fp8,fp8,0,6.5636238098144535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,1,128,1,float16,float16,0,11.422513580322265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,2,128,1,float16,fp8,0,6.2973888397216795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,2,128,1,float16,float16,0,10.42229766845703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,2,128,1,fp8,fp8,0,6.807633972167968
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,4,128,1,float16,fp8,0,6.286719894409179
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,4,128,1,float16,float16,0,11.383805084228516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,4,128,1,fp8,fp8,0,6.245323181152344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,8,128,1,float16,fp8,0,6.434062194824219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,8,128,1,float16,float16,0,11.660135650634766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,8,128,1,fp8,fp8,0,6.682288360595703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,64,128,1,float16,fp8,0,3.190412712097168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,64,128,1,float16,float16,0,5.41918716430664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,64,128,1,fp8,fp8,0,3.5561454772949217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,1,128,1,float16,float16,0,4.975247955322265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,1,128,1,float16,fp8,0,2.985592079162598
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,1,128,1,fp8,fp8,0,2.7858943939208984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,2,128,1,fp8,fp8,0,2.9407583236694337
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,2,128,1,float16,float16,0,4.693511962890625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,2,128,1,float16,fp8,0,3.274391937255859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,4,128,1,float16,float16,0,4.551715087890625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,4,128,1,float16,fp8,0,2.925359916687012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,4,128,1,fp8,fp8,0,3.4774383544921874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,8,128,1,float16,float16,0,5.198996734619141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,8,128,1,float16,fp8,0,2.847760009765625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,8,128,1,fp8,fp8,0,3.159823989868164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,64,128,1,float16,float16,0,2.0234048843383787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,64,128,1,float16,fp8,0,1.8532272338867188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,64,128,1,fp8,fp8,0,1.5282943725585938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,1,128,1,float16,float16,0,1.6437664031982422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,1,128,1,float16,fp8,0,1.7975408554077148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,1,128,1,fp8,fp8,0,1.5299039840698243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,2,128,1,float16,float16,0,1.6890975952148437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,2,128,1,float16,fp8,0,1.6426015853881837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,2,128,1,fp8,fp8,0,1.621900749206543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,4,128,1,float16,float16,0,2.0236719131469725
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,4,128,1,float16,fp8,0,1.802574348449707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,4,128,1,fp8,fp8,0,1.6312623977661134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,8,128,1,float16,float16,0,1.7513904571533203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,64,128,1,float16,float16,0,0.9434176445007324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,8,128,1,fp8,fp8,0,1.428659152984619
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,8,128,1,float16,fp8,0,1.7413215637207031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,64,128,1,float16,fp8,0,1.0372336387634278
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,64,128,1,fp8,fp8,0,0.8828288078308105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,1,128,1,float16,float16,0,0.8549023628234863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,1,128,1,float16,fp8,0,0.8935855865478516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,1,128,1,fp8,fp8,0,0.8668160438537598
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,2,128,1,float16,float16,0,0.8886256217956543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,2,128,1,float16,fp8,0,0.7953711986541748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,2,128,1,fp8,fp8,0,0.7899600028991699
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,4,128,1,float16,float16,0,0.9056336402893066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,4,128,1,float16,fp8,0,0.7689151763916016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,4,128,1,fp8,fp8,0,0.817801570892334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,8,128,1,float16,float16,0,0.8696864128112793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,8,128,1,float16,fp8,0,0.7608751773834228
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,8,128,1,fp8,fp8,0,0.7710927963256836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,1,128,1,float16,fp8,0,8.039036560058594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,1,128,1,fp8,fp8,0,8.104483032226563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,1,128,1,float16,float16,0,12.189096069335937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,2,128,1,float16,float16,0,11.883735656738281
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,2,128,1,float16,fp8,0,8.093863677978515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,2,128,1,fp8,fp8,0,8.127648162841798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,4,128,1,float16,fp8,0,7.310113525390625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,4,128,1,float16,float16,0,12.17330551147461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,4,128,1,fp8,fp8,0,8.168593597412109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,8,128,1,float16,fp8,0,7.465340423583984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,8,128,1,float16,float16,0,12.8531005859375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,8,128,1,fp8,fp8,0,7.376932525634766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,64,128,1,float16,float16,0,7.250788879394531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,64,128,1,float16,fp8,0,3.7986129760742187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,64,128,1,fp8,fp8,0,4.153164672851562
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,1,128,1,fp8,fp8,0,3.460076904296875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,1,128,1,float16,fp8,0,3.8383342742919924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,1,128,1,float16,float16,0,5.786198425292969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,2,128,1,float16,float16,0,5.837126541137695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,2,128,1,float16,fp8,0,3.620832061767578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,2,128,1,fp8,fp8,0,3.919851303100586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,4,128,1,fp8,fp8,0,3.5553279876708985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,4,128,1,float16,float16,0,5.954636764526367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,4,128,1,float16,fp8,0,3.8544960021972656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,8,128,1,float16,float16,0,6.462889862060547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,8,128,1,float16,fp8,0,3.6619983673095704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,8,128,1,fp8,fp8,0,3.9864959716796875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,64,128,1,float16,fp8,0,2.000993537902832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,64,128,1,float16,float16,0,2.9234256744384766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,64,128,1,fp8,fp8,0,1.8714624404907227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,1,128,1,float16,float16,0,1.9379024505615234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,1,128,1,float16,fp8,0,1.8539056777954102
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,1,128,1,fp8,fp8,0,1.6181184768676757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,2,128,1,float16,float16,0,1.9008623123168946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,2,128,1,fp8,fp8,0,1.6276527404785157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,2,128,1,float16,fp8,0,2.3127264022827148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,4,128,1,float16,float16,0,1.9338495254516601
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,4,128,1,float16,fp8,0,2.874062347412109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,4,128,1,fp8,fp8,0,1.8558944702148437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,8,128,1,float16,float16,0,1.9261760711669922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,8,128,1,float16,fp8,0,2.009355163574219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,64,128,1,float16,float16,0,1.0723888397216796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,8,128,1,fp8,fp8,0,1.6599536895751954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,64,128,1,float16,fp8,0,1.3794480323791505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,1,128,1,float16,float16,0,1.457145595550537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,1,128,1,float16,fp8,0,0.9670991897583008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,1,128,1,fp8,fp8,0,0.9026592254638672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,2,128,1,float16,fp8,0,0.8860367774963379
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,2,128,1,float16,float16,0,1.3800512313842774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,4,128,1,float16,float16,0,0.9806575775146484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,4,128,1,float16,fp8,0,1.1156944274902343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,4,128,1,fp8,fp8,0,0.8635312080383301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,8,128,1,float16,float16,0,1.100108814239502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,8,128,1,float16,fp8,0,0.8706928253173828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,64,128,1,float16,float16,0,0.5822607994079589
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,2,128,1,fp8,fp8,0,0.8582752227783204
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,8,128,1,fp8,fp8,0,1.172475242614746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,64,128,1,float16,fp8,0,0.5082255840301514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,1,128,1,float16,float16,0,0.5272416114807129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,1,128,1,float16,fp8,0,0.5104383945465087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,1,128,1,fp8,fp8,0,0.4606319904327393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,64,128,1,fp8,fp8,0,0.9310400009155273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,2,128,1,float16,float16,0,0.5719312191009521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,2,128,1,float16,fp8,0,0.46166081428527833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,2,128,1,fp8,fp8,0,0.4563119888305664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,4,128,1,float16,float16,0,0.5599152088165283
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,4,128,1,float16,fp8,0,0.45957279205322266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,4,128,1,fp8,fp8,0,0.45587520599365233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,8,128,1,float16,fp8,0,0.4582479953765869
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,8,128,1,float16,float16,0,0.533353614807129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,8,128,1,fp8,fp8,0,0.4541056156158447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,64,128,1,fp8,fp8,0,0.5076015949249267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,1,128,1,fp8,fp8,0,6.444478607177734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,1,128,1,float16,float16,0,11.607161712646484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,2,128,1,float16,float16,0,9.917515563964844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,1,128,1,float16,fp8,0,7.103392028808594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,2,128,1,float16,fp8,0,6.481390380859375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,4,128,1,float16,fp8,0,6.901598358154297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,2,128,1,fp8,fp8,0,7.322763061523437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,4,128,1,fp8,fp8,0,6.60675048828125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,4,128,1,float16,float16,0,10.48539047241211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,8,128,1,float16,fp8,0,7.862477111816406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,64,128,1,float16,float16,0,6.306284713745117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,8,128,1,fp8,fp8,0,7.236271667480469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,8,128,1,float16,float16,0,11.540564727783202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,64,128,1,float16,fp8,0,3.9691600799560547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,64,128,1,fp8,fp8,0,3.8069694519042967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,1,128,1,float16,fp8,0,3.346004867553711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,1,128,1,float16,float16,0,5.428955078125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,1,128,1,fp8,fp8,0,3.3948192596435547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,2,128,1,float16,float16,0,5.26246223449707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,2,128,1,fp8,fp8,0,3.197599983215332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,2,128,1,float16,fp8,0,4.04429931640625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,4,128,1,float16,float16,0,5.881735992431641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,4,128,1,float16,fp8,0,3.1820703506469727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,4,128,1,fp8,fp8,0,3.4284526824951174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,8,128,1,float16,float16,0,4.730580902099609
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,64,128,1,float16,float16,0,2.521060752868652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,8,128,1,float16,fp8,0,3.7039409637451173
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,8,128,1,fp8,fp8,0,3.2607872009277346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,64,128,1,float16,fp8,0,2.208196830749512
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,64,128,1,fp8,fp8,0,2.198771286010742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,1,128,1,float16,fp8,0,1.6436767578125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,1,128,1,float16,float16,0,2.422480010986328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,1,128,1,fp8,fp8,0,1.7703903198242188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,2,128,1,float16,fp8,0,1.5750911712646485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,2,128,1,float16,float16,0,1.8890447616577148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,2,128,1,fp8,fp8,0,2.065310478210449
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,4,128,1,float16,float16,0,1.9614576339721679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,4,128,1,float16,fp8,0,1.553321647644043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,4,128,1,fp8,fp8,0,1.9501823425292968
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,8,128,1,fp8,fp8,0,1.6058351516723632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,8,128,1,float16,fp8,0,2.109129524230957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,8,128,1,float16,float16,0,2.609932708740234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,64,128,1,float16,float16,0,1.104145622253418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,64,128,1,float16,fp8,0,0.9632767677307129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,64,128,1,fp8,fp8,0,1.085908794403076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,1,128,1,float16,fp8,0,1.1360511779785156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,1,128,1,float16,float16,0,1.4136159896850586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,1,128,1,fp8,fp8,0,0.8050592422485352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,2,128,1,float16,float16,0,0.9183584213256836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,2,128,1,float16,fp8,0,0.8310832023620606
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,2,128,1,fp8,fp8,0,1.160142421722412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,4,128,1,float16,float16,0,0.901460838317871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,4,128,1,float16,fp8,0,0.9142560005187989
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,4,128,1,fp8,fp8,0,0.7885039806365967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,8,128,1,float16,float16,0,0.9426848411560058
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,8,128,1,fp8,fp8,0,0.7946288108825683
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,8,128,1,float16,fp8,0,0.956116771697998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,64,128,1,float16,float16,0,0.6014927864074707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,64,128,1,float16,fp8,0,0.49830241203308107
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,64,128,1,fp8,fp8,0,0.5062655925750732
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,1,128,1,float16,float16,0,0.5756351947784424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,1,128,1,float16,fp8,0,0.43083038330078127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,2,128,1,float16,fp8,0,0.41992640495300293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,2,128,1,float16,float16,0,0.5778416156768799
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,2,128,1,fp8,fp8,0,0.4204400062561035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,4,128,1,float16,float16,0,0.5436319828033447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,4,128,1,float16,fp8,0,0.4206223964691162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,4,128,1,fp8,fp8,0,0.520475196838379
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,8,128,1,float16,float16,0,0.47648000717163086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,8,128,1,float16,fp8,0,0.4519040107727051
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,8,128,1,fp8,fp8,0,0.4202256202697754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,64,128,1,float16,float16,0,0.3013551950454712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,64,128,1,float16,fp8,0,0.2639008045196533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,64,128,1,fp8,fp8,0,0.2670703887939453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,1,128,1,float16,float16,0,0.26026880741119385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,1,128,1,float16,fp8,0,0.23421120643615723
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,2,128,1,float16,float16,0,0.26107358932495117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,2,128,1,float16,fp8,0,0.23373761177062988
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,2,128,1,fp8,fp8,0,0.23253118991851807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,4,128,1,float16,float16,0,0.26339359283447267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,4,128,1,float16,fp8,0,0.23162240982055665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,4,128,1,fp8,fp8,0,0.23304638862609864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,8,128,1,float16,float16,0,0.2636656045913696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,8,128,1,float16,fp8,0,0.23356959819793702
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,8,128,1,fp8,fp8,0,0.23284320831298827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,1,128,1,fp8,fp8,0,0.4329728126525879
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,1,128,1,fp8,fp8,0,0.23401279449462892
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,1,128,1,float16,fp8,0,3.7739727020263674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,1,128,1,float16,float16,0,5.749863815307617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,1,128,1,fp8,fp8,0,3.8754974365234376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,2,128,1,float16,float16,0,5.422268676757812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,2,128,1,float16,fp8,0,3.802529525756836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,2,128,1,fp8,fp8,0,4.132555389404297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,4,128,1,float16,float16,0,6.162830352783203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,4,128,1,float16,fp8,0,3.80035514831543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,4,128,1,fp8,fp8,0,3.791281509399414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,8,128,1,float16,float16,0,5.4220928192138675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,64,128,1,float16,float16,0,2.986067199707031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,8,128,1,fp8,fp8,0,3.8528785705566406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,8,128,1,float16,fp8,0,3.911296081542969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,1,128,1,float16,float16,0,2.2155248641967775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,64,128,1,float16,fp8,0,2.322007942199707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,64,128,1,fp8,fp8,0,2.2134031295776366
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,2,128,1,float16,fp8,0,1.8430383682250977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,1,128,1,fp8,fp8,0,2.074860763549805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,2,128,1,float16,float16,0,2.9944831848144533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,1,128,1,float16,fp8,0,2.052817535400391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,2,128,1,fp8,fp8,0,1.8699071884155274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,4,128,1,float16,float16,0,2.222281646728516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,4,128,1,float16,fp8,0,2.301395225524902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,4,128,1,fp8,fp8,0,1.8279695510864258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,8,128,1,float16,fp8,0,1.915947151184082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,8,128,1,float16,float16,0,2.784547233581543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,64,128,1,float16,float16,0,1.3032959938049316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,8,128,1,fp8,fp8,0,2.074996757507324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,64,128,1,float16,fp8,0,1.258199977874756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,1,128,1,float16,float16,0,1.1807536125183105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,1,128,1,float16,fp8,0,0.9824975967407227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,64,128,1,fp8,fp8,0,1.7677871704101562
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,1,128,1,fp8,fp8,0,1.0486144065856933
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,2,128,1,fp8,fp8,0,0.9308095932006836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,2,128,1,float16,fp8,0,1.1108719825744628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,4,128,1,float16,float16,0,1.0483903884887695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,4,128,1,float16,fp8,0,0.9289471626281738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,4,128,1,fp8,fp8,0,1.0551983833312988
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,8,128,1,float16,float16,0,1.1424752235412599
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,8,128,1,float16,fp8,0,0.9279264450073242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,8,128,1,fp8,fp8,0,0.9432559967041015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,64,128,1,float16,float16,0,0.7684400081634521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,64,128,1,float16,fp8,0,0.6009888172149658
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,64,128,1,fp8,fp8,0,0.6820703983306885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,1,128,1,float16,fp8,0,0.4970287799835205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,2,128,1,float16,float16,0,1.4817168235778808
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,1,128,1,float16,float16,0,0.8420592308044433
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,1,128,1,fp8,fp8,0,0.5287888050079346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,2,128,1,float16,float16,0,0.5369584083557128
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,2,128,1,float16,fp8,0,0.4955455780029297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,4,128,1,float16,float16,0,0.5501391887664795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,2,128,1,fp8,fp8,0,0.6332655906677246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,4,128,1,float16,fp8,0,0.4860720157623291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,4,128,1,fp8,fp8,0,0.49365921020507814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,8,128,1,float16,fp8,0,0.5209904193878174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,8,128,1,float16,float16,0,0.7812096118927002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,64,128,1,float16,float16,0,0.35533120632171633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,8,128,1,fp8,fp8,0,0.5067264080047608
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,64,128,1,float16,fp8,0,0.3268048048019409
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,1,128,1,float16,float16,0,0.2993232011795044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,1,128,1,float16,fp8,0,0.2631759881973267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,1,128,1,fp8,fp8,0,0.2680655956268311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,2,128,1,float16,float16,0,0.29783360958099364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,2,128,1,float16,fp8,0,0.26208319664001467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,2,128,1,fp8,fp8,0,0.26687519550323485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,4,128,1,float16,float16,0,0.299235200881958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,4,128,1,float16,fp8,0,0.26571199893951414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,4,128,1,fp8,fp8,0,0.26735360622406007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,8,128,1,float16,float16,0,0.30148160457611084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,8,128,1,fp8,fp8,0,0.26584959030151367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,64,128,1,float16,float16,0,0.19442880153656006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,64,128,1,float16,fp8,0,0.17473920583724975
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,64,128,1,fp8,fp8,0,0.17427200078964233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,1,128,1,float16,float16,0,0.1630079984664917
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,64,128,1,fp8,fp8,0,0.44803681373596194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,1,128,1,float16,fp8,0,0.14750399589538574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,1,128,1,fp8,fp8,0,0.14625600576400757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,2,128,1,float16,float16,0,0.16363199949264526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,2,128,1,float16,fp8,0,0.1464800000190735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,2,128,1,fp8,fp8,0,0.1467136025428772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,4,128,1,float16,fp8,0,0.14736640453338623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,8,128,1,float16,fp8,0,0.2676383972167969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,4,128,1,fp8,fp8,0,0.14715520143508912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,8,128,1,float16,float16,0,0.16962720155715943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,8,128,1,float16,fp8,0,0.14658880233764648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,8,128,1,fp8,fp8,0,0.1481487989425659
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,4,128,1,float16,float16,0,0.164683198928833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,1,128,1,float16,fp8,0,3.8238929748535155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,1,128,1,float16,float16,0,5.189700698852539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,1,128,1,fp8,fp8,0,3.669123077392578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,2,128,1,float16,float16,0,5.896476745605469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,2,128,1,float16,fp8,0,3.7964481353759765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,2,128,1,fp8,fp8,0,3.623611068725586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,4,128,1,float16,float16,0,5.700044631958008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,4,128,1,fp8,fp8,0,3.8579246520996096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,4,128,1,float16,fp8,0,3.859355163574219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,8,128,1,float16,float16,0,5.6731201171875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,8,128,1,float16,fp8,0,3.8266929626464843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,64,128,1,float16,float16,0,3.1972688674926757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,8,128,1,fp8,fp8,0,3.9361759185791017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,64,128,1,float16,fp8,0,2.331470489501953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,64,128,1,fp8,fp8,0,2.31136474609375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,1,128,1,float16,fp8,0,1.9236175537109375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,1,128,1,float16,float16,0,2.6989215850830077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,1,128,1,fp8,fp8,0,1.8426336288452148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,2,128,1,float16,float16,0,1.9267391204833983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,2,128,1,float16,fp8,0,1.8919727325439453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,2,128,1,fp8,fp8,0,2.0192239761352537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,4,128,1,float16,float16,0,2.0077951431274412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,4,128,1,float16,fp8,0,1.8872528076171875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,4,128,1,fp8,fp8,0,2.162980842590332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,8,128,1,float16,float16,0,2.9238943099975585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,8,128,1,fp8,fp8,0,1.8392255783081055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,8,128,1,float16,fp8,0,2.067884826660156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,64,128,1,float16,float16,0,1.5178879737854003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,1,128,1,float16,float16,0,1.0606207847595215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,64,128,1,fp8,fp8,0,1.2398672103881836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,64,128,1,float16,fp8,0,1.5697792053222657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,1,128,1,float16,fp8,0,1.4504048347473144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,2,128,1,float16,float16,0,0.9885120391845703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,1,128,1,fp8,fp8,0,1.1573007583618165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,2,128,1,float16,fp8,0,0.9869808197021485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,2,128,1,fp8,fp8,0,1.1129504203796388
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,4,128,1,float16,fp8,0,0.9717103958129882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,4,128,1,fp8,fp8,0,1.0045984268188477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,4,128,1,float16,float16,0,1.4515199661254883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,8,128,1,float16,float16,0,1.1611696243286134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,8,128,1,float16,fp8,0,0.941915225982666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,8,128,1,fp8,fp8,0,0.9593631744384765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,64,128,1,float16,float16,0,1.011299228668213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,64,128,1,float16,fp8,0,0.6203343868255615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,1,128,1,float16,float16,0,0.544265604019165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,1,128,1,float16,fp8,0,0.5951680183410645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,1,128,1,fp8,fp8,0,0.5383503913879395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,2,128,1,float16,float16,0,0.5448575973510742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,2,128,1,float16,fp8,0,0.4762095928192139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,2,128,1,fp8,fp8,0,0.5386112213134766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,4,128,1,float16,float16,0,0.6667039871215821
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,4,128,1,float16,fp8,0,0.47478561401367186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,4,128,1,fp8,fp8,0,0.4803919792175293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,8,128,1,float16,float16,0,0.7541903972625732
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,8,128,1,float16,fp8,0,0.47410879135131834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,8,128,1,fp8,fp8,0,0.4787439823150635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,64,128,1,float16,fp8,0,0.31766719818115235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,64,128,1,fp8,fp8,0,0.3203567981719971
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,64,128,1,float16,float16,0,0.4784704208374023
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,1,128,1,float16,float16,0,0.2752831935882568
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,1,128,1,float16,fp8,0,0.25889599323272705
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,1,128,1,fp8,fp8,0,0.35589599609375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,2,128,1,float16,float16,0,0.274399995803833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,64,128,1,fp8,fp8,0,0.6158127784729004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,2,128,1,fp8,fp8,0,0.35543999671936033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,4,128,1,float16,float16,0,0.27526719570159913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,4,128,1,fp8,fp8,0,0.35577120780944826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,8,128,1,float16,float16,0,0.28148798942565917
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,8,128,1,float16,fp8,0,0.25426878929138186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,8,128,1,fp8,fp8,0,0.2520495891571045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,64,128,1,float16,float16,0,0.19297759532928466
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,64,128,1,float16,fp8,0,0.1726912021636963
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,64,128,1,fp8,fp8,0,0.17339199781417847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,1,128,1,float16,float16,0,0.1507024049758911
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,1,128,1,float16,fp8,0,0.1375663995742798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,1,128,1,fp8,fp8,0,0.13729920387268066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,2,128,1,float16,float16,0,0.1512287974357605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,2,128,1,float16,fp8,0,0.13830080032348632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,2,128,1,fp8,fp8,0,0.13772640228271485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,4,128,1,float16,float16,0,0.15287040472030639
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,4,128,1,float16,fp8,0,0.13791199922561645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,2,128,1,float16,fp8,0,0.25912320613861084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,4,128,1,fp8,fp8,0,0.1381983995437622
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,8,128,1,float16,float16,0,0.16007039546966553
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,8,128,1,float16,fp8,0,0.13841279745101928
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,8,128,1,fp8,fp8,0,0.14148160219192504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,64,128,1,float16,float16,0,0.1110975980758667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,64,128,1,float16,fp8,0,0.0989296019077301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,64,128,1,fp8,fp8,0,0.09748160243034362
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,1,128,1,float16,float16,0,0.08838719725608826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,1,128,1,float16,fp8,0,0.08027520179748535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,1,128,1,fp8,fp8,0,0.08107360005378723
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,2,128,1,float16,float16,0,0.08664479851722717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,2,128,1,float16,fp8,0,0.08063520193099975
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,2,128,1,fp8,fp8,0,0.0802191972732544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,4,128,1,float16,float16,0,0.08883360028266907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,4,128,1,float16,fp8,0,0.08031520247459412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,4,128,1,fp8,fp8,0,0.08059359788894653
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,8,128,1,float16,float16,0,0.08818879723548889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,8,128,1,float16,fp8,0,0.08065760135650635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,8,128,1,fp8,fp8,0,0.08029279708862305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,4,128,1,float16,fp8,0,0.25708959102630613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,1,128,1,float16,fp8,0,2.230847930908203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,1,128,1,float16,float16,0,2.433679962158203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,1,128,1,fp8,fp8,0,2.258742332458496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,2,128,1,float16,fp8,0,2.2400047302246096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,2,128,1,float16,float16,0,3.04412956237793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,4,128,1,float16,float16,0,2.818916893005371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,4,128,1,float16,fp8,0,2.244883155822754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,4,128,1,fp8,fp8,0,2.591035270690918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,2,128,1,fp8,fp8,0,2.9350303649902343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,8,128,1,float16,fp8,0,2.2549055099487303
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,8,128,1,float16,float16,0,3.0857471466064452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,8,128,1,fp8,fp8,0,2.3248239517211915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,64,128,1,float16,fp8,0,1.5272319793701172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,64,128,1,fp8,fp8,0,1.925347137451172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,1,128,1,float16,float16,0,1.2117247581481934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,1,128,1,float16,fp8,0,1.135647964477539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,1,128,1,fp8,fp8,0,1.1582799911499024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,2,128,1,float16,float16,0,1.5773856163024902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,2,128,1,float16,fp8,0,1.1317968368530273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,64,128,1,float16,float16,0,1.7317968368530274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,2,128,1,fp8,fp8,0,1.1436384201049805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,4,128,1,float16,fp8,0,1.129964828491211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,4,128,1,float16,float16,0,1.7645792007446288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,4,128,1,fp8,fp8,0,1.1427824020385742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,8,128,1,float16,float16,0,1.337604808807373
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,8,128,1,float16,fp8,0,1.1267919540405273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,64,128,1,float16,float16,0,0.8879167556762695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,8,128,1,fp8,fp8,0,1.4296719551086425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,64,128,1,float16,fp8,0,0.8776191711425781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,1,128,1,float16,float16,0,0.637713623046875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,1,128,1,float16,fp8,0,0.6805647850036621
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,1,128,1,fp8,fp8,0,0.5941535949707031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,2,128,1,float16,float16,0,0.7654831886291504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,2,128,1,float16,fp8,0,0.581937599182129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,2,128,1,fp8,fp8,0,0.587985610961914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,4,128,1,float16,float16,0,0.7835487842559814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,4,128,1,fp8,fp8,0,0.587224006652832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,8,128,1,float16,fp8,0,0.5865119934082031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,8,128,1,fp8,fp8,0,0.5837711811065673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,64,128,1,fp8,fp8,0,0.7719583988189698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,64,128,1,float16,float16,0,0.4527760028839111
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,64,128,1,float16,fp8,0,0.39959039688110354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,64,128,1,fp8,fp8,0,0.40168161392211915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,1,128,1,float16,float16,0,0.3256767988204956
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,1,128,1,float16,fp8,0,0.3075536012649536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,1,128,1,fp8,fp8,0,0.30414400100708006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,4,128,1,float16,fp8,0,0.5808559894561768
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,2,128,1,float16,float16,0,0.3307215929031372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,2,128,1,float16,fp8,0,0.3032047986984253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,2,128,1,fp8,fp8,0,0.30421440601348876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,4,128,1,float16,float16,0,0.3341583967208862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,4,128,1,float16,fp8,0,0.30307679176330565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,4,128,1,fp8,fp8,0,0.30290400981903076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,8,128,1,float16,float16,0,0.3424751996994019
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,8,128,1,fp8,fp8,0,0.3021359920501709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,64,128,1,float16,float16,0,0.24036478996276855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,64,128,1,float16,fp8,0,0.21174399852752684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,64,128,1,fp8,fp8,0,0.21477758884429932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,1,128,1,float16,float16,0,0.17556159496307372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,1,128,1,float16,fp8,0,0.16546720266342163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,1,128,1,fp8,fp8,0,0.1631168007850647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,2,128,1,float16,fp8,0,0.16350879669189453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,2,128,1,fp8,fp8,0,0.16306560039520263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,4,128,1,float16,float16,0,0.1777680039405823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,4,128,1,float16,fp8,0,0.16293280124664306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,4,128,1,fp8,fp8,0,0.16299200057983398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,8,128,1,float16,float16,0,0.18080960512161254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,8,128,1,float16,fp8,0,0.1625391960144043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,8,128,1,fp8,fp8,0,0.16270400285720826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,8,128,1,float16,float16,0,0.7601007938385009
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,64,128,1,float16,float16,0,0.1289631962776184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,64,128,1,float16,fp8,0,0.11750240325927734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,64,128,1,fp8,fp8,0,0.11660480499267578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,1,128,1,float16,float16,0,0.09951679706573487
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,1,128,1,float16,fp8,0,0.09022560119628906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,1,128,1,fp8,fp8,0,0.09160000085830688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,2,128,1,float16,float16,0,0.09986240267753602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,2,128,1,float16,fp8,0,0.09113439917564392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,2,128,1,fp8,fp8,0,0.09173439741134644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,2,128,1,float16,float16,0,0.178166401386261
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,4,128,1,float16,float16,0,0.10100159645080567
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,4,128,1,float16,fp8,0,0.09141280055046082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,4,128,1,fp8,fp8,0,0.09167680144309998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,8,128,1,float16,float16,0,0.10370080471038819
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,8,128,1,float16,fp8,0,0.09217919707298279
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,64,128,1,float16,float16,0,0.07653759717941284
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,8,128,1,fp8,fp8,0,0.09240480065345764
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,64,128,1,float16,fp8,0,0.06968640089035034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,64,128,1,fp8,fp8,0,0.06826720237731934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,1,128,1,float16,float16,0,0.06064479947090149
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,1,128,1,float16,fp8,0,0.05593119859695435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,1,128,1,fp8,fp8,0,0.05622400045394897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,2,128,1,float16,float16,0,0.05984799861907959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,2,128,1,float16,fp8,0,0.05650399923324585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,2,128,1,fp8,fp8,0,0.055904000997543335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,4,128,1,float16,float16,0,0.061105602979660036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,4,128,1,float16,fp8,0,0.05593760013580322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,8,128,1,float16,fp8,0,0.30201919078826905
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,4,128,1,fp8,fp8,0,0.056734400987625125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,8,128,1,float16,float16,0,0.060399997234344485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,8,128,1,float16,fp8,0,0.055936002731323244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,1,128,1,float16,fp8,0,2.3596479415893556
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,1,128,1,float16,float16,0,2.700388717651367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,8,128,1,fp8,fp8,0,0.05619199872016907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,1,128,1,fp8,fp8,0,2.378156852722168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,2,128,1,float16,fp8,0,2.387238311767578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,2,128,1,float16,float16,0,3.0311887741088865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,2,128,1,fp8,fp8,0,2.379596710205078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,4,128,1,float16,float16,0,2.7864048004150392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,4,128,1,float16,fp8,0,2.380406379699707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,4,128,1,fp8,fp8,0,2.3755823135375977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,8,128,1,float16,float16,0,3.002191925048828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,8,128,1,float16,fp8,0,2.4955568313598633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,8,128,1,fp8,fp8,0,2.37634391784668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,64,128,1,float16,float16,0,2.034921646118164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,64,128,1,float16,fp8,0,1.8584800720214845
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,1,128,1,float16,float16,0,1.26779203414917
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,1,128,1,float16,fp8,0,1.196225643157959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,64,128,1,fp8,fp8,0,1.705308723449707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,1,128,1,fp8,fp8,0,1.2320112228393554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,2,128,1,float16,float16,0,1.268620777130127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,2,128,1,float16,fp8,0,1.1938464164733886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,2,128,1,fp8,fp8,0,1.1967007637023925
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,4,128,1,float16,float16,0,1.2942144393920898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,4,128,1,float16,fp8,0,1.2179007530212402
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,4,128,1,fp8,fp8,0,1.1927712440490723
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,8,128,1,float16,float16,0,1.4346303939819336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,8,128,1,float16,fp8,0,1.3625167846679687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,64,128,1,float16,float16,0,0.9690752029418945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,8,128,1,fp8,fp8,0,1.2023391723632812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,64,128,1,float16,fp8,0,1.0261232376098632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,64,128,1,fp8,fp8,0,0.8787856101989746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,1,128,1,float16,float16,0,0.671062421798706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,1,128,1,float16,fp8,0,0.6225903987884521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,2,128,1,float16,float16,0,0.6529327869415283
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,1,128,1,fp8,fp8,0,0.6799039840698242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,2,128,1,float16,fp8,0,0.761572790145874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,2,128,1,fp8,fp8,0,0.6189199924468994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,4,128,1,float16,float16,0,0.6657567977905273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,4,128,1,float16,fp8,0,0.6119472026824951
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,4,128,1,fp8,fp8,0,0.6095983982086182
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,8,128,1,float16,float16,0,0.6804224014282226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,8,128,1,fp8,fp8,0,0.6094655990600586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,8,128,1,float16,fp8,0,0.6472271919250489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,64,128,1,float16,float16,0,0.4997471809387207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,1,128,1,float16,float16,0,0.34145920276641845
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,64,128,1,fp8,fp8,0,0.44629440307617185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,1,128,1,float16,fp8,0,0.31705119609832766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,1,128,1,fp8,fp8,0,0.33036160469055176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,2,128,1,float16,float16,0,0.3333823919296265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,2,128,1,float16,fp8,0,0.3165776014328003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,2,128,1,fp8,fp8,0,0.32363998889923096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,4,128,1,float16,float16,0,0.33855040073394777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,4,128,1,float16,fp8,0,0.3151999950408936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,4,128,1,fp8,fp8,0,0.3203632116317749
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,8,128,1,float16,float16,0,0.3507071971893311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,8,128,1,float16,fp8,0,0.3141024112701416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,64,128,1,float16,float16,0,0.2621488094329834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,8,128,1,fp8,fp8,0,0.31863040924072267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,64,128,1,float16,fp8,0,0.23315200805664063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,1,128,1,float16,float16,0,0.1816704034805298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,1,128,1,float16,fp8,0,0.16841440200805663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,1,128,1,fp8,fp8,0,0.17087680101394653
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,2,128,1,float16,float16,0,0.1778656005859375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,2,128,1,float16,fp8,0,0.1704767942428589
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,2,128,1,fp8,fp8,0,0.16818879842758178
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,4,128,1,float16,float16,0,0.18358399868011474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,4,128,1,float16,fp8,0,0.1673424005508423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,64,128,1,float16,fp8,0,0.44567041397094725
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,4,128,1,fp8,fp8,0,0.17056479454040527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,8,128,1,float16,float16,0,0.18490560054779054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,8,128,1,float16,fp8,0,0.16940959692001342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,8,128,1,fp8,fp8,0,0.1695855975151062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,64,128,1,float16,float16,0,0.13880319595336915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,64,128,1,float16,fp8,0,0.1279263973236084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,64,128,1,fp8,fp8,0,0.12712960243225097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,1,128,1,float16,float16,0,0.09821119904518127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,1,128,1,float16,fp8,0,0.09285280108451843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,1,128,1,fp8,fp8,0,0.09101120233535767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,2,128,1,float16,float16,0,0.1008944034576416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,2,128,1,float16,fp8,0,0.09109920263290405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,2,128,1,fp8,fp8,0,0.09337120056152344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,4,128,1,float16,float16,0,0.09941920042037963
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,4,128,1,float16,fp8,0,0.09335359930992126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,4,128,1,fp8,fp8,0,0.09117119908332824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,8,128,1,float16,float16,0,0.10454080104827881
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,8,128,1,float16,fp8,0,0.0913807988166809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,8,128,1,fp8,fp8,0,0.09489759802818298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,64,128,1,float16,float16,0,0.07972480058670044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,64,128,1,fp8,fp8,0,0.24577600955963136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,64,128,1,float16,fp8,0,0.07204480171203613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,64,128,1,fp8,fp8,0,0.07076320052146912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,1,128,1,float16,float16,0,0.05573440194129944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,1,128,1,float16,fp8,0,0.05471519827842712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,1,128,1,fp8,fp8,0,0.05395200252532959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,2,128,1,float16,float16,0,0.05571200251579285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,2,128,1,float16,fp8,0,0.05457280278205871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,4,128,1,float16,float16,0,0.056720000505447385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,4,128,1,float16,fp8,0,0.055448001623153685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,4,128,1,fp8,fp8,0,0.053457599878311154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,8,128,1,float16,float16,0,0.05983039736747742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,8,128,1,float16,fp8,0,0.05356799960136414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,64,128,1,float16,float16,0,0.045105600357055665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,64,128,1,float16,fp8,0,0.04314720034599304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,64,128,1,fp8,fp8,0,0.04329119920730591
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,1,128,1,float16,float16,0,0.0369376003742218
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,1,128,1,float16,fp8,0,0.03511039912700653
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,1,128,1,fp8,fp8,0,0.035231998562812804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,2,128,1,float16,float16,0,0.03709439933300018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,2,128,1,float16,fp8,0,0.03507040143013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,2,128,1,fp8,fp8,0,0.0350847989320755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,4,128,1,float16,float16,0,0.03731360137462616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,4,128,1,float16,fp8,0,0.03515360057353974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,4,128,1,fp8,fp8,0,0.03511840105056763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,8,128,1,float16,float16,0,0.03761120140552521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,8,128,1,float16,fp8,0,0.03514719903469086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,8,128,1,fp8,fp8,0,0.035129600763320924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,2,128,1,fp8,fp8,0,0.05446239709854126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,8,128,1,fp8,fp8,0,0.05519840121269226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,1,128,1,float16,float16,0,1.9009231567382812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,1,128,1,float16,fp8,0,1.7551471710205078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,1,128,1,fp8,fp8,0,1.767755126953125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,2,128,1,float16,float16,0,1.9022592544555663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,2,128,1,fp8,fp8,0,1.7499919891357423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,2,128,1,float16,fp8,0,1.823841667175293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,4,128,1,float16,float16,0,1.8397167205810547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,4,128,1,float16,fp8,0,1.8140287399291992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,4,128,1,fp8,fp8,0,1.7480287551879883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,8,128,1,float16,float16,0,2.2163999557495115
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,8,128,1,fp8,fp8,0,1.7596799850463867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,64,128,1,float16,float16,0,1.5563520431518554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,64,128,1,float16,fp8,0,1.445521640777588
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,64,128,1,fp8,fp8,0,1.3873456001281739
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,1,128,1,float16,float16,0,0.9468671798706054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,1,128,1,float16,fp8,0,0.8926735877990722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,1,128,1,fp8,fp8,0,0.8989439964294433
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,2,128,1,float16,float16,0,0.8942111968994141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,2,128,1,float16,fp8,0,0.8899328231811523
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,8,128,1,float16,fp8,0,2.2120031356811523
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,2,128,1,fp8,fp8,0,0.896940803527832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,4,128,1,float16,float16,0,1.0065999984741212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,4,128,1,float16,fp8,0,0.8872159957885742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,4,128,1,fp8,fp8,0,0.8917455673217773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,8,128,1,float16,float16,0,0.9681856155395507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,8,128,1,fp8,fp8,0,0.8820927619934082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,64,128,1,float16,float16,0,0.7981823921203614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,1,128,1,float16,float16,0,0.47217278480529784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,64,128,1,float16,fp8,0,0.7078527927398681
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,64,128,1,fp8,fp8,0,0.7022624015808105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,1,128,1,float16,fp8,0,0.46281919479370115
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,2,128,1,float16,float16,0,0.46109280586242674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,1,128,1,fp8,fp8,0,0.46114239692687986
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,2,128,1,float16,fp8,0,0.4618847846984863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,4,128,1,float16,float16,0,0.4776895999908447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,4,128,1,float16,fp8,0,0.4588160037994385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,4,128,1,fp8,fp8,0,0.4573840141296387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,8,128,1,float16,float16,0,0.5004047870635986
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,8,128,1,float16,fp8,0,0.9335776329040527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,8,128,1,float16,fp8,0,0.4556431770324707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,8,128,1,fp8,fp8,0,0.45405120849609376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,64,128,1,float16,float16,0,0.4031407833099365
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,64,128,1,float16,fp8,0,0.3625296115875244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,64,128,1,fp8,fp8,0,0.364081597328186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,1,128,1,float16,fp8,0,0.23787999153137207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,1,128,1,fp8,fp8,0,0.23973920345306396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,2,128,1,float16,float16,0,0.24346399307250977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,2,128,1,float16,fp8,0,0.23854238986968995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,2,128,1,fp8,fp8,0,0.23842720985412597
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,4,128,1,float16,float16,0,0.25093441009521483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,2,128,1,fp8,fp8,0,0.45450558662414553
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,4,128,1,float16,fp8,0,0.23922719955444335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,4,128,1,fp8,fp8,0,0.2367647886276245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,8,128,1,float16,float16,0,0.2554208040237427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,8,128,1,float16,fp8,0,0.23712480068206787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,64,128,1,float16,float16,0,0.2144320011138916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,64,128,1,float16,fp8,0,0.19019360542297364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,64,128,1,fp8,fp8,0,0.19079840183258057
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,1,128,1,float16,float16,0,0.13097599744796753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,1,128,1,float16,fp8,0,0.12597600221633912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,1,128,1,float16,float16,0,0.2408751964569092
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,1,128,1,fp8,fp8,0,0.12594239711761473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,2,128,1,float16,float16,0,0.1292207956314087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,2,128,1,fp8,fp8,0,0.12481119632720947
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,4,128,1,float16,float16,0,0.13089120388031006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,4,128,1,float16,fp8,0,0.12477920055389405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,4,128,1,fp8,fp8,0,0.12633440494537354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,8,128,1,fp8,fp8,0,0.23615679740905762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,8,128,1,float16,float16,0,0.13651200532913207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,8,128,1,float16,fp8,0,0.12636799812316896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,8,128,1,fp8,fp8,0,0.12483199834823608
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,64,128,1,float16,float16,0,0.11490559577941895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,64,128,1,float16,fp8,0,0.1011072039604187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,64,128,1,fp8,fp8,0,0.10242079496383667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,1,128,1,float16,float16,0,0.07184159755706787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,1,128,1,float16,fp8,0,0.06861600279808044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,1,128,1,fp8,fp8,0,0.06801279783248901
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,2,128,1,float16,float16,0,0.07398399710655212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,2,128,1,float16,fp8,0,0.06801440119743347
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,2,128,1,fp8,fp8,0,0.06894720196723939
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,4,128,1,float16,float16,0,0.07321760058403015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,4,128,1,float16,fp8,0,0.06950719952583313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,4,128,1,fp8,fp8,0,0.06858720183372498
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,2,128,1,float16,fp8,0,0.1257423996925354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,8,128,1,float16,float16,0,0.07712000012397766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,8,128,1,float16,fp8,0,0.06902239918708801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,8,128,1,fp8,fp8,0,0.07057600021362305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,64,128,1,float16,float16,0,0.06603519916534424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,64,128,1,float16,fp8,0,0.05744479894638062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,1,128,1,float16,float16,0,0.04159359931945801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,1,128,1,float16,fp8,0,0.03951039910316467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,1,128,1,fp8,fp8,0,0.04078719913959503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,2,128,1,float16,float16,0,0.04141440093517303
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,2,128,1,fp8,fp8,0,0.041140800714492796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,4,128,1,float16,float16,0,0.041308799386024476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,4,128,1,float16,fp8,0,0.04073440134525299
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,4,128,1,fp8,fp8,0,0.03981919884681702
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,8,128,1,float16,float16,0,0.04346559941768646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,8,128,1,float16,fp8,0,0.0398032009601593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,8,128,1,fp8,fp8,0,0.04081760048866272
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,64,128,1,float16,float16,0,0.03512159883975983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,64,128,1,float16,fp8,0,0.035036799311637876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,64,128,1,fp8,fp8,0,0.03308480083942413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,1,128,1,float16,fp8,0,0.026704001426696777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,1,128,1,fp8,fp8,0,0.026840001344680786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,2,128,1,float16,float16,0,0.02689119875431061
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,2,128,1,float16,fp8,0,0.026979199051856993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,2,128,1,fp8,fp8,0,0.02701759934425354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,4,128,1,float16,float16,0,0.02697120010852814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,4,128,1,float16,fp8,0,0.026907199621200563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,4,128,1,fp8,fp8,0,0.026947200298309326
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,8,128,1,float16,float16,0,0.028988799452781676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,8,128,1,float16,fp8,0,0.026924800872802735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,8,128,1,fp8,fp8,0,0.02688319981098175
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,64,128,1,fp8,fp8,0,0.05797759890556335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,64,128,1,float16,float16,0,0.020683200657367708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,64,128,1,float16,fp8,0,0.020974400639533996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,64,128,1,fp8,fp8,0,0.020768000185489653
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,2,128,1,float16,fp8,0,0.03949759900569916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,1,128,1,float16,float16,0,0.017020800709724428
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,1,128,1,float16,fp8,0,0.016944000124931337
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,1,128,1,fp8,fp8,0,0.01690399944782257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,2,128,1,fp8,fp8,0,0.016927999258041383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,4,128,1,float16,float16,0,0.016884799301624297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,4,128,1,float16,fp8,0,0.01685280054807663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,1,128,1,float16,float16,0,0.026953598856925963
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,4,128,1,fp8,fp8,0,0.01688639968633652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,8,128,1,float16,fp8,0,0.01693760007619858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,8,128,1,fp8,fp8,0,0.016859200596809388
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,1,128,1,float16,float16,0,0.7172304153442383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,1,128,1,float16,fp8,0,0.725870418548584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,2,128,1,float16,float16,0,0.01791999936103821
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,1,128,1,fp8,fp8,0,0.7256959915161133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,8,128,1,float16,float16,0,0.018622399866580965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,2,128,1,float16,float16,0,0.7210991859436036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,2,128,1,float16,fp8,0,0.7235536098480224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,2,128,1,fp8,fp8,0,0.7303599834442138
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,4,128,1,float16,fp8,0,0.7209184169769287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,4,128,1,fp8,fp8,0,0.7252799987792968
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,8,128,1,float16,float16,0,0.7715888023376465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,2,128,1,float16,fp8,0,0.016715200245380403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,8,128,1,float16,fp8,0,0.7225967884063721
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,8,128,1,fp8,fp8,0,0.7246975898742676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,64,128,1,float16,float16,0,0.6953167915344238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,64,128,1,float16,fp8,0,0.6216752052307128
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,64,128,1,fp8,fp8,0,0.6192063808441162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,1,128,1,float16,fp8,0,0.37459840774536135
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,4,128,1,float16,float16,0,0.7419248104095459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,1,128,1,fp8,fp8,0,0.37156000137329104
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,2,128,1,float16,float16,0,0.37179200649261473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,2,128,1,float16,fp8,0,0.37342560291290283
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,2,128,1,fp8,fp8,0,0.37096641063690183
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,4,128,1,float16,float16,0,0.38237919807434084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,4,128,1,float16,fp8,0,0.37265279293060305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,4,128,1,fp8,fp8,0,0.36930079460144044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,8,128,1,float16,float16,0,0.4023791790008545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,8,128,1,float16,fp8,0,0.36972799301147463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,8,128,1,fp8,fp8,0,0.36686880588531495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,64,128,1,float16,fp8,0,0.31644320487976074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,64,128,1,fp8,fp8,0,0.3165855884552002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,1,128,1,float16,float16,0,0.19230079650878906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,1,128,1,float16,float16,0,0.36821761131286623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,1,128,1,float16,fp8,0,0.19277440309524535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,1,128,1,fp8,fp8,0,0.19102079868316652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,2,128,1,float16,float16,0,0.19212160110473633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,2,128,1,float16,fp8,0,0.1911072015762329
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,2,128,1,fp8,fp8,0,0.19139519929885865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,4,128,1,float16,float16,0,0.19605120420455932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,4,128,1,float16,fp8,0,0.19121919870376586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,4,128,1,fp8,fp8,0,0.1911072015762329
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,8,128,1,float16,float16,0,0.20765600204467774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,8,128,1,float16,fp8,0,0.19031519889831544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,8,128,1,fp8,fp8,0,0.19071199893951415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,64,128,1,float16,float16,0,0.18469280004501343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,64,128,1,float16,fp8,0,0.16548160314559937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,1,128,1,float16,float16,0,0.10351200103759765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,1,128,1,float16,fp8,0,0.10103839635848999
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,64,128,1,float16,float16,0,0.3592767953872681
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,1,128,1,fp8,fp8,0,0.10110559463500976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,2,128,1,float16,float16,0,0.10484319925308228
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,2,128,1,float16,fp8,0,0.10093599557876587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,2,128,1,fp8,fp8,0,0.10277600288391113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,4,128,1,float16,float16,0,0.10532480478286743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,4,128,1,float16,fp8,0,0.10285279750823975
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,4,128,1,fp8,fp8,0,0.10088640451431274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,8,128,1,float16,float16,0,0.1116752028465271
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,8,128,1,float16,fp8,0,0.10180000066757203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,8,128,1,fp8,fp8,0,0.10304160118103027
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,64,128,1,float16,fp8,0,0.08809599876403809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,64,128,1,fp8,fp8,0,0.08820319771766663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,1,128,1,float16,float16,0,0.05760319828987122
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,1,128,1,float16,fp8,0,0.054527997970581055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,1,128,1,fp8,fp8,0,0.05434240102767944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,2,128,1,float16,float16,0,0.05794559717178345
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,2,128,1,float16,fp8,0,0.05370399951934814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,2,128,1,fp8,fp8,0,0.05386080145835877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,4,128,1,float16,float16,0,0.05802879929542541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,64,128,1,fp8,fp8,0,0.16620320081710815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,4,128,1,float16,fp8,0,0.05480319857597351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,4,128,1,fp8,fp8,0,0.054262399673461914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,8,128,1,float16,float16,0,0.0608847975730896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,8,128,1,float16,fp8,0,0.05558239817619324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,8,128,1,fp8,fp8,0,0.05565440058708191
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,64,128,1,float16,float16,0,0.05924640297889709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,64,128,1,float16,fp8,0,0.05135840177536011
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,64,128,1,fp8,fp8,0,0.051019197702407836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,1,128,1,float16,float16,0,0.03490079939365387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,1,128,1,float16,fp8,0,0.032974401116371156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,1,128,1,fp8,fp8,0,0.03461120128631592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,2,128,1,float16,float16,0,0.03330560028553009
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,2,128,1,float16,fp8,0,0.033923199772834776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,2,128,1,fp8,fp8,0,0.03315840065479279
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,4,128,1,float16,float16,0,0.03516800105571747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,4,128,1,float16,fp8,0,0.033214399218559267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,4,128,1,fp8,fp8,0,0.03311040103435516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,64,128,1,float16,float16,0,0.09628639817237854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,8,128,1,float16,fp8,0,0.033206400275230405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,8,128,1,fp8,fp8,0,0.03297759890556336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,64,128,1,float16,float16,0,0.028968000411987306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,64,128,1,float16,fp8,0,0.02892000079154968
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,64,128,1,fp8,fp8,0,0.028969600796699524
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,1,128,1,float16,fp8,0,0.02083200067281723
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,1,128,1,fp8,fp8,0,0.020929600298404693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,2,128,1,float16,float16,0,0.021028800308704375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,2,128,1,float16,fp8,0,0.02091519981622696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,2,128,1,fp8,fp8,0,0.020838400721549986
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,4,128,1,float16,float16,0,0.022278399765491487
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,4,128,1,float16,fp8,0,0.020787200331687926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,4,128,1,fp8,fp8,0,0.02069920003414154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,8,128,1,float16,float16,0,0.02276639938354492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,8,128,1,float16,fp8,0,0.02080159932374954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,8,128,1,fp8,fp8,0,0.020803199708461763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,64,128,1,float16,float16,0,0.017459200322628023
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,64,128,1,float16,fp8,0,0.01870879977941513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,64,128,1,fp8,fp8,0,0.018671999871730804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,1,128,1,float16,float16,0,0.014655999839305878
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,1,128,1,float16,fp8,0,0.014635199308395385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,1,128,1,fp8,fp8,0,0.01467680037021637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,2,128,1,float16,float16,0,0.01462399959564209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,2,128,1,float16,fp8,0,0.014686399698257446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,2,128,1,fp8,fp8,0,0.01462240070104599
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,8,128,1,float16,float16,0,0.0353520005941391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,4,128,1,float16,float16,0,0.014588800072669984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,4,128,1,float16,fp8,0,0.014678399264812469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,4,128,1,fp8,fp8,0,0.01459839940071106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,8,128,1,float16,float16,0,0.014635199308395385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,8,128,1,float16,fp8,0,0.014732800424098969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,8,128,1,fp8,fp8,0,0.014691199362277984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,1,128,1,float16,float16,0,0.020664000511169435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,64,128,1,float16,float16,0,0.016601599752902985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,64,128,1,float16,fp8,0,0.0166143998503685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,1,128,1,float16,float16,0,0.014417600631713868
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,1,128,1,float16,fp8,0,0.014545600116252898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,1,128,1,fp8,fp8,0,0.014502400159835815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,2,128,1,float16,float16,0,0.014500799775123595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,2,128,1,float16,fp8,0,0.014531199634075165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,2,128,1,fp8,fp8,0,0.014059199392795563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,4,128,1,float16,fp8,0,0.014396800100803376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,4,128,1,fp8,fp8,0,0.014552000164985656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,8,128,1,float16,float16,0,0.014547200500965118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,8,128,1,float16,fp8,0,0.014508800208568573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,8,128,1,fp8,fp8,0,0.014560000598430633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,1,128,1,float16,float16,0,0.444974422454834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,1,128,1,float16,fp8,0,0.4548448085784912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,1,128,1,fp8,fp8,0,0.4547232151031494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,64,128,1,fp8,fp8,0,0.01658080071210861
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,2,128,1,float16,float16,0,0.44404802322387693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,4,128,1,float16,float16,0,0.01443839967250824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,2,128,1,float16,fp8,0,0.45316481590270996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,2,128,1,fp8,fp8,0,0.4537968158721924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,4,128,1,float16,fp8,0,0.4523295879364014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,4,128,1,float16,float16,0,0.45800957679748533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,4,128,1,fp8,fp8,0,0.4525455951690674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,8,128,1,float16,float16,0,0.4720592021942139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,8,128,1,fp8,fp8,0,0.4512063980102539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,64,128,1,float16,float16,0,0.3890079975128174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,64,128,1,float16,fp8,0,0.35672318935394287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,1,128,1,float16,float16,0,0.23038239479064943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,64,128,1,fp8,fp8,0,0.35633120536804197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,1,128,1,float16,fp8,0,0.23393120765686035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,2,128,1,float16,float16,0,0.2307919979095459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,1,128,1,fp8,fp8,0,0.23434081077575683
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,2,128,1,float16,fp8,0,0.23319520950317382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,2,128,1,fp8,fp8,0,0.23346080780029296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,4,128,1,float16,float16,0,0.23706400394439697
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,4,128,1,float16,fp8,0,0.23346879482269287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,4,128,1,fp8,fp8,0,0.23237600326538085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,8,128,1,float16,float16,0,0.24730560779571534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,8,128,1,float16,fp8,0,0.23240959644317627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,8,128,1,fp8,fp8,0,0.2319200038909912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,64,128,1,float16,float16,0,0.20275840759277344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,64,128,1,float16,fp8,0,0.1843503952026367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,64,128,1,fp8,fp8,0,0.18443039655685425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,1,128,1,float16,float16,0,0.12201600074768067
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,1,128,1,float16,fp8,0,0.12131520509719848
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,1,128,1,fp8,fp8,0,0.1212928056716919
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,2,128,1,float16,float16,0,0.12202880382537842
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,2,128,1,fp8,fp8,0,0.12181279659271241
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,8,128,1,float16,fp8,0,0.45297441482543943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,4,128,1,float16,fp8,0,0.12120319604873657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,4,128,1,fp8,fp8,0,0.1214095950126648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,8,128,1,float16,float16,0,0.12939679622650146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,8,128,1,float16,fp8,0,0.12115679979324341
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,8,128,1,fp8,fp8,0,0.12115520238876343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,64,128,1,float16,float16,0,0.10491679906845093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,64,128,1,float16,fp8,0,0.09722880125045777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,64,128,1,fp8,fp8,0,0.09865760207176208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,1,128,1,float16,float16,0,0.06596959829330444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,1,128,1,float16,fp8,0,0.06584320068359376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,2,128,1,float16,float16,0,0.06789439916610718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,2,128,1,float16,fp8,0,0.0659056007862091
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,2,128,1,fp8,fp8,0,0.06597920060157776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,4,128,1,float16,float16,0,0.06911680102348328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,4,128,1,float16,fp8,0,0.0661087989807129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,4,128,1,fp8,fp8,0,0.06686879992485047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,8,128,1,float16,float16,0,0.070796799659729
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,8,128,1,float16,fp8,0,0.06618239879608154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,8,128,1,fp8,fp8,0,0.06602399945259094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,64,128,1,float16,float16,0,0.05806879997253418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,2,128,1,float16,fp8,0,0.121070396900177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,64,128,1,float16,fp8,0,0.05353599786758423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,4,128,1,float16,float16,0,0.12391040325164795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,64,128,1,fp8,fp8,0,0.053566402196884154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,1,128,1,float16,float16,0,0.03711999952793121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,1,128,1,float16,fp8,0,0.036929601430892946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,1,128,1,fp8,fp8,0,0.03604960143566131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,2,128,1,float16,float16,0,0.03709119856357575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,2,128,1,fp8,fp8,0,0.036520001292228696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,4,128,1,float16,float16,0,0.03712959885597229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,4,128,1,float16,fp8,0,0.03657119870185852
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,4,128,1,fp8,fp8,0,0.037067198753356935
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,8,128,1,float16,fp8,0,0.037084800004959104
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,8,128,1,fp8,fp8,0,0.037227201461791995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,1,128,1,fp8,fp8,0,0.06574400067329407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,64,128,1,float16,fp8,0,0.03089280128479004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,64,128,1,fp8,fp8,0,0.030913600325584413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,1,128,1,float16,float16,0,0.022716799378395082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,1,128,1,float16,fp8,0,0.022756800055503845
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,1,128,1,fp8,fp8,0,0.022705599665641785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,2,128,1,float16,float16,0,0.022793599963188173
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,2,128,1,float16,fp8,0,0.022716799378395082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,2,128,1,fp8,fp8,0,0.022759999334812164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,4,128,1,float16,float16,0,0.02285120040178299
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,4,128,1,float16,fp8,0,0.022814400494098663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,4,128,1,fp8,fp8,0,0.022815999388694764
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,8,128,1,float16,float16,0,0.024753600358963013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,8,128,1,float16,fp8,0,0.022819200158119203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,8,128,1,fp8,fp8,0,0.022843199968338012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,64,128,1,float16,float16,0,0.01860000044107437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,64,128,1,float16,fp8,0,0.01892320066690445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,64,128,1,fp8,fp8,0,0.018723200261592864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,2,128,1,float16,fp8,0,0.037038400769233704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,1,128,1,float16,float16,0,0.01634240001440048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,1,128,1,float16,fp8,0,0.016553600132465363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,1,128,1,fp8,fp8,0,0.016195200383663177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,8,128,1,float16,float16,0,0.03819519877433777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,2,128,1,float16,fp8,0,0.014611199498176575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,2,128,1,fp8,fp8,0,0.014617599546909332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,4,128,1,float16,float16,0,0.014531199634075165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,4,128,1,float16,fp8,0,0.014612799882888794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,4,128,1,fp8,fp8,0,0.014742399752140044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,64,128,1,float16,float16,0,0.032979199290275575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,8,128,1,float16,float16,0,0.016551999747753142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,8,128,1,float16,fp8,0,0.017403200268745422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,64,128,1,float16,float16,0,0.014601600170135499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,64,128,1,float16,fp8,0,0.014484800398349762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,64,128,1,fp8,fp8,0,0.014563199877738953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,1,128,1,float16,float16,0,0.012611199915409089
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,1,128,1,float16,fp8,0,0.012651200592517852
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,1,128,1,fp8,fp8,0,0.012492799758911132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,2,128,1,float16,float16,0,0.012417600303888322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,2,128,1,float16,fp8,0,0.012486399710178375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,2,128,1,fp8,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,4,128,1,float16,float16,0,0.012566399574279786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,4,128,1,float16,fp8,0,0.012598399817943574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,4,128,1,fp8,fp8,0,0.012595200538635254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,8,128,1,float16,float16,0,0.01255040019750595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,2,128,1,float16,float16,0,0.016388800740242005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,8,128,1,float16,fp8,0,0.012542399764060973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,8,128,1,fp8,fp8,0,0.0123648002743721
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,64,128,1,float16,float16,0,0.01263359934091568
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,64,128,1,float16,fp8,0,0.012415999919176102
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,1,128,1,float16,float16,0,0.011857599765062333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,1,128,1,float16,fp8,0,0.01242400035262108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,1,128,1,fp8,fp8,0,0.012377600371837615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,8,128,1,fp8,fp8,0,0.01462399959564209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,2,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,2,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,4,128,1,float16,float16,0,0.011164800077676774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,4,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,4,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,8,128,1,float16,float16,0,0.010764800012111664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,8,128,1,float16,fp8,0,0.010657600313425063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,8,128,1,fp8,fp8,0,0.010716799646615982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,1,128,1,float16,float16,0,0.3724720001220703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,1,128,1,float16,fp8,0,0.37028160095214846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,1,128,1,fp8,fp8,0,0.3707823991775513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,64,128,1,fp8,fp8,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,2,128,1,float16,float16,0,0.012388800084590913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,2,128,1,float16,fp8,0,0.37326080799102784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,2,128,1,fp8,fp8,0,0.37380480766296387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,4,128,1,float16,float16,0,0.3817375898361206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,4,128,1,fp8,fp8,0,0.37148480415344237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,8,128,1,float16,float16,0,0.39025919437408446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,8,128,1,float16,fp8,0,0.3707119941711426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,8,128,1,fp8,fp8,0,0.37116959095001223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,64,128,1,float16,float16,0,0.2758752107620239
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,64,128,1,float16,fp8,0,0.2536911964416504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,64,128,1,fp8,fp8,0,0.25350399017333985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,4,128,1,float16,fp8,0,0.3735663890838623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,1,128,1,float16,float16,0,0.19651199579238893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,1,128,1,float16,fp8,0,0.1921183943748474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,1,128,1,fp8,fp8,0,0.19164320230484008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,2,128,1,float16,float16,0,0.1953439950942993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,2,128,1,float16,fp8,0,0.1912351965904236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,2,128,1,fp8,fp8,0,0.19207680225372314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,4,128,1,float16,float16,0,0.1953968048095703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,4,128,1,float16,fp8,0,0.19073439836502076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,4,128,1,fp8,fp8,0,0.19208159446716308
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,8,128,1,float16,float16,0,0.19999200105667114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,8,128,1,float16,fp8,0,0.19101279973983765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,8,128,1,fp8,fp8,0,0.19137120246887207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,64,128,1,float16,float16,0,0.13839839696884154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,64,128,1,float16,fp8,0,0.13156319856643678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,64,128,1,fp8,fp8,0,0.13179199695587157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,1,128,1,float16,fp8,0,0.10108000040054321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,2,128,1,float16,float16,0,0.3719759941101074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,2,128,1,float16,float16,0,0.10501760244369507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,2,128,1,float16,fp8,0,0.09891200065612793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,2,128,1,fp8,fp8,0,0.10103199481964112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,4,128,1,float16,float16,0,0.10469919443130493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,4,128,1,float16,fp8,0,0.10056159496307374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,4,128,1,fp8,fp8,0,0.10024640560150147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,8,128,1,float16,float16,0,0.10785919427871704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,8,128,1,float16,fp8,0,0.0999504029750824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,8,128,1,fp8,fp8,0,0.10034719705581666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,64,128,1,float16,float16,0,0.07517120242118835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,64,128,1,fp8,fp8,0,0.07003679871559143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,1,128,1,float16,float16,0,0.053990399837493895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,1,128,1,float16,fp8,0,0.053609597682952884
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,1,128,1,fp8,fp8,0,0.053692799806594846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,2,128,1,float16,float16,0,0.05391839742660522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,2,128,1,float16,fp8,0,0.05355679988861084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,2,128,1,fp8,fp8,0,0.05367839932441711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,4,128,1,float16,float16,0,0.05563520193099976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,4,128,1,float16,fp8,0,0.053604799509048465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,4,128,1,fp8,fp8,0,0.05359359979629517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,1,128,1,float16,float16,0,0.10306719541549683
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,8,128,1,float16,float16,0,0.05761280059814453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,8,128,1,float16,fp8,0,0.053527998924255374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,1,128,1,fp8,fp8,0,0.10038880109786988
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,8,128,1,fp8,fp8,0,0.05374720096588135
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,64,128,1,float16,float16,0,0.0403328001499176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,64,128,1,float16,fp8,0,0.03908160030841827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,64,128,1,fp8,fp8,0,0.039155200123786926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,1,128,1,float16,fp8,0,0.031038400530815125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,1,128,1,fp8,fp8,0,0.03100000023841858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,2,128,1,float16,float16,0,0.03123359978199005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,2,128,1,float16,fp8,0,0.031043198704719544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,2,128,1,fp8,fp8,0,0.031017601490020752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,4,128,1,float16,float16,0,0.03145439922809601
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,4,128,1,float16,fp8,0,0.030956798791885377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,64,128,1,float16,fp8,0,0.07135199904441833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,4,128,1,fp8,fp8,0,0.030983999371528625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,8,128,1,float16,float16,0,0.03301439881324768
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,8,128,1,fp8,fp8,0,0.031095999479293823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,64,128,1,float16,float16,0,0.023982399702072145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,64,128,1,float16,fp8,0,0.024926400184631346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,64,128,1,fp8,fp8,0,0.024799999594688416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,1,128,1,float16,float16,0,0.020956799387931824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,1,128,1,float16,fp8,0,0.020880000293254854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,1,128,1,fp8,fp8,0,0.0210207998752594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,2,128,1,float16,float16,0,0.020857599377632142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,1,128,1,float16,float16,0,0.03155519962310791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,2,128,1,float16,fp8,0,0.020902399718761445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,2,128,1,fp8,fp8,0,0.020769600570201874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,4,128,1,float16,float16,0,0.02093279957771301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,4,128,1,float16,fp8,0,0.020761600136756896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,8,128,1,float16,float16,0,0.021185599267482758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,8,128,1,float16,fp8,0,0.02075359970331192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,8,128,1,fp8,fp8,0,0.02073120027780533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,64,128,1,float16,float16,0,0.016667200624942778
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,64,128,1,float16,fp8,0,0.016542400419712066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,8,128,1,float16,fp8,0,0.031040000915527343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,1,128,1,float16,float16,0,0.014670400321483612
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,1,128,1,float16,fp8,0,0.014723199605941772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,1,128,1,fp8,fp8,0,0.014604799449443817
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,2,128,1,float16,float16,0,0.014558400213718414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,2,128,1,float16,fp8,0,0.014950400590896607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,4,128,1,float16,float16,0,0.014694400131702423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,4,128,1,float16,fp8,0,0.014510400593280792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,4,128,1,fp8,fp8,0,0.014582400023937226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,8,128,1,float16,float16,0,0.014601600170135499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,8,128,1,float16,fp8,0,0.014665600657463074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,8,128,1,fp8,fp8,0,0.014604799449443817
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,64,128,1,float16,float16,0,0.02470400035381317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,64,128,1,float16,fp8,0,0.012588800489902496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,4,128,1,fp8,fp8,0,0.0209184005856514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,1,128,1,float16,float16,0,0.011073599755764007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,1,128,1,float16,fp8,0,0.010679999738931656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,1,128,1,fp8,fp8,0,0.010673599690198899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,2,128,1,float16,float16,0,0.01077279970049858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,64,128,1,fp8,fp8,0,0.0165120005607605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,2,128,1,float16,fp8,0,0.010667199641466141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,2,128,1,fp8,fp8,0,0.010676799714565277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,4,128,1,float16,float16,0,0.010811199992895126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,4,128,1,float16,fp8,0,0.01093600019812584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,4,128,1,fp8,fp8,0,0.01061279997229576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,8,128,1,float16,float16,0,0.010923200100660325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,8,128,1,float16,fp8,0,0.010755199939012527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,8,128,1,fp8,fp8,0,0.010838399827480315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,64,128,1,float16,float16,0,0.012368000298738479
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,64,128,1,float16,fp8,0,0.010688000172376633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,64,128,1,fp8,fp8,0,0.01207199990749359
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,1,128,1,float16,fp8,0,0.01072160005569458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,2,128,1,fp8,fp8,0,0.014590400457382201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,1,128,1,fp8,fp8,0,0.01072160005569458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,2,128,1,float16,float16,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,2,128,1,float16,fp8,0,0.010844799876213073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,2,128,1,fp8,fp8,0,0.010579200088977813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,4,128,1,float16,fp8,0,0.010660800337791442
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,4,128,1,fp8,fp8,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,8,128,1,float16,float16,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,8,128,1,float16,fp8,0,0.01063840016722679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,8,128,1,fp8,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,64,128,1,fp8,fp8,0,0.012518399953842163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,1,128,1,float16,float16,0,0.3311728000640869
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,1,128,1,float16,fp8,0,0.33038721084594724
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,1,128,1,fp8,fp8,0,0.3268847942352295
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,2,128,1,float16,float16,0,0.33474719524383545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,2,128,1,float16,fp8,0,0.32654879093170164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,2,128,1,fp8,fp8,0,0.3301215887069702
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,1,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,4,128,1,float16,fp8,0,0.33016319274902345
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,4,128,1,fp8,fp8,0,0.3265167951583862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,8,128,1,float16,float16,0,0.3411776065826416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,4,128,1,float16,float16,0,0.010598400235176086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,8,128,1,float16,fp8,0,0.3262079954147339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,8,128,1,fp8,fp8,0,0.3301151990890503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,64,128,1,float16,fp8,0,0.1989743947982788
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,64,128,1,fp8,fp8,0,0.20116639137268066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,1,128,1,float16,float16,0,0.1722208023071289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,1,128,1,float16,fp8,0,0.17027039527893068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,1,128,1,fp8,fp8,0,0.16817280054092407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,2,128,1,float16,float16,0,0.17468160390853882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,2,128,1,float16,fp8,0,0.16832319498062134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,2,128,1,fp8,fp8,0,0.17027360200881958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,4,128,1,float16,float16,0,0.17309759855270385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,4,128,1,float16,fp8,0,0.17038400173187257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,4,128,1,fp8,fp8,0,0.1684559941291809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,8,128,1,float16,float16,0,0.178275203704834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,8,128,1,float16,fp8,0,0.16826879978179932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,64,128,1,float16,float16,0,0.11232960224151611
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,4,128,1,float16,float16,0,0.33316960334777834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,64,128,1,float16,fp8,0,0.10485440492630005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,64,128,1,fp8,fp8,0,0.10491199493408203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,1,128,1,float16,float16,0,0.0907696008682251
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,1,128,1,float16,fp8,0,0.08849440217018127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,1,128,1,fp8,fp8,0,0.08852480053901672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,2,128,1,float16,float16,0,0.09069120287895202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,64,128,1,float16,float16,0,0.21294560432434081
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,2,128,1,float16,fp8,0,0.08862559795379639
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,4,128,1,float16,float16,0,0.0911296010017395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,4,128,1,float16,fp8,0,0.08826239705085755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,4,128,1,fp8,fp8,0,0.08857759833335876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,8,128,1,float16,float16,0,0.09295840263366699
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,8,128,1,float16,fp8,0,0.0882207989692688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,8,128,1,fp8,fp8,0,0.17045120000839234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,8,128,1,fp8,fp8,0,0.08853440284729004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,64,128,1,float16,fp8,0,0.055731201171875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,64,128,1,fp8,fp8,0,0.055720001459121704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,1,128,1,float16,float16,0,0.049958398938179015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,1,128,1,float16,fp8,0,0.04840640127658844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,2,128,1,fp8,fp8,0,0.08831040263175964
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,1,128,1,fp8,fp8,0,0.04836480021476745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,2,128,1,float16,float16,0,0.05106880068778992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,2,128,1,float16,fp8,0,0.047654399275779726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,2,128,1,fp8,fp8,0,0.04952319860458374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,4,128,1,float16,float16,0,0.049772799015045166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,4,128,1,float16,fp8,0,0.04954079985618591
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,4,128,1,fp8,fp8,0,0.04775039851665497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,8,128,1,float16,float16,0,0.05183519721031189
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,8,128,1,float16,fp8,0,0.047572800517082216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,8,128,1,fp8,fp8,0,0.049700799584388736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,64,128,1,float16,float16,0,0.03306719958782196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,64,128,1,float16,fp8,0,0.033073601126670835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,64,128,1,fp8,fp8,0,0.03298240005970001
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,1,128,1,float16,float16,0,0.029552000761032104
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,1,128,1,float16,fp8,0,0.028972798585891725
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,1,128,1,fp8,fp8,0,0.028966400027275085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,2,128,1,float16,fp8,0,0.029121598601341246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,64,128,1,float16,float16,0,0.05975199937820434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,4,128,1,float16,float16,0,0.03038240075111389
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,4,128,1,float16,fp8,0,0.029009601473808287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,8,128,1,float16,float16,0,0.0304639995098114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,8,128,1,float16,fp8,0,0.028854399919509888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,8,128,1,fp8,fp8,0,0.028790399432182312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,64,128,1,float16,float16,0,0.020948800444602966
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,64,128,1,float16,fp8,0,0.0208079993724823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,64,128,1,fp8,fp8,0,0.020857599377632142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,1,128,1,float16,float16,0,0.02000479996204376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,1,128,1,float16,fp8,0,0.018848000466823576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,1,128,1,fp8,fp8,0,0.018777599930763243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,2,128,1,float16,float16,0,0.020641599595546723
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,2,128,1,float16,fp8,0,0.0190080001950264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,2,128,1,fp8,fp8,0,0.01886720061302185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,4,128,1,float16,float16,0,0.02067359983921051
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,4,128,1,float16,fp8,0,0.018811200559139252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,4,128,1,fp8,fp8,0,0.01879359930753708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,8,128,1,float16,float16,0,0.02054399996995926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,8,128,1,float16,fp8,0,0.018756799399852753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,8,128,1,fp8,fp8,0,0.01883520036935806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,64,128,1,float16,float16,0,0.015719999372959138
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,2,128,1,float16,float16,0,0.029716798663139345
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,2,128,1,fp8,fp8,0,0.028961598873138428
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,64,128,1,fp8,fp8,0,0.014724799990653991
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,1,128,1,float16,float16,0,0.014614400267601014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,1,128,1,float16,fp8,0,0.012595200538635254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,4,128,1,fp8,fp8,0,0.02892799973487854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,1,128,1,fp8,fp8,0,0.014575999975204468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,2,128,1,float16,float16,0,0.013478399813175201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,2,128,1,float16,fp8,0,0.014590400457382201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,2,128,1,fp8,fp8,0,0.014572800695896148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,4,128,1,float16,float16,0,0.014182400703430176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,4,128,1,float16,fp8,0,0.014510400593280792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,4,128,1,fp8,fp8,0,0.014646400511264802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,8,128,1,float16,float16,0,0.014547200500965118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,8,128,1,float16,fp8,0,0.014535999298095703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,8,128,1,fp8,fp8,0,0.012644800543785095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,64,128,1,float16,float16,0,0.012577599287033081
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,64,128,1,fp8,fp8,0,0.010601600259542465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,64,128,1,float16,fp8,0,0.010585600137710571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,1,128,1,float16,fp8,0,0.010688000172376633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,1,128,1,fp8,fp8,0,0.01063840016722679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,2,128,1,float16,float16,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,2,128,1,float16,fp8,0,0.010735999792814255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,2,128,1,fp8,fp8,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,4,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,4,128,1,fp8,fp8,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,8,128,1,float16,float16,0,0.01063840016722679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,8,128,1,float16,fp8,0,0.010593599826097488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,8,128,1,fp8,fp8,0,0.010518400371074677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,64,128,1,float16,float16,0,0.012582400441169738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,64,128,1,float16,fp8,0,0.010635200142860412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,64,128,1,fp8,fp8,0,0.010599999874830245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,64,128,1,float16,fp8,0,0.014633600413799287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,1,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,1,128,1,fp8,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,2,128,1,float16,float16,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,2,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,2,128,1,fp8,fp8,0,0.010620799660682679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,4,128,1,float16,float16,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,4,128,1,float16,fp8,0,0.010542400181293488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,4,128,1,fp8,fp8,0,0.010608000308275222
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,1,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,8,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,8,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,8,128,1,fp8,fp8,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,4,128,1,float16,float16,0,0.010644800215959548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,1,128,1,float16,float16,0,0.32154719829559325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,1,128,1,float16,fp8,0,0.3096127986907959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,2,128,1,float16,float16,0,0.32361760139465334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,2,128,1,float16,fp8,0,0.3092351913452148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,1,128,1,float16,float16,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,2,128,1,fp8,fp8,0,0.3095072031021118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,4,128,1,float16,fp8,0,0.30914719104766847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,4,128,1,float16,float16,0,0.32275679111480715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,4,128,1,fp8,fp8,0,0.3086767911911011
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,8,128,1,float16,float16,0,0.32484960556030273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,8,128,1,float16,fp8,0,0.3088479995727539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,8,128,1,fp8,fp8,0,0.3069583892822266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,64,128,1,float16,fp8,0,0.1745360016822815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,64,128,1,fp8,fp8,0,0.17474240064620972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,1,128,1,fp8,fp8,0,0.30810720920562745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,1,128,1,float16,float16,0,0.165830397605896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,1,128,1,float16,fp8,0,0.1580512046813965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,1,128,1,fp8,fp8,0,0.15776480436325074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,2,128,1,float16,float16,0,0.166539204120636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,2,128,1,float16,fp8,0,0.15747519731521606
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,2,128,1,fp8,fp8,0,0.1571328043937683
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,4,128,1,float16,float16,0,0.1674496054649353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,4,128,1,float16,fp8,0,0.15781919956207274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,4,128,1,fp8,fp8,0,0.15792640447616577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,8,128,1,float16,fp8,0,0.15689599514007568
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,8,128,1,fp8,fp8,0,0.157096004486084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,64,128,1,float16,float16,0,0.09827679991722107
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,64,128,1,float16,fp8,0,0.09058079719543458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,64,128,1,float16,float16,0,0.1880336046218872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,1,128,1,float16,float16,0,0.08836479783058167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,1,128,1,float16,fp8,0,0.08292160034179688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,1,128,1,fp8,fp8,0,0.08243680000305176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,2,128,1,float16,float16,0,0.08913279771804809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,2,128,1,float16,fp8,0,0.08238720297813415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,2,128,1,fp8,fp8,0,0.08285120129585266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,4,128,1,float16,float16,0,0.08877279758453369
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,4,128,1,float16,fp8,0,0.08274880051612854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,4,128,1,fp8,fp8,0,0.08230239748954774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,8,128,1,float16,float16,0,0.09089440107345581
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,8,128,1,float16,fp8,0,0.08244320154190063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,8,128,1,fp8,fp8,0,0.08295999765396118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,64,128,1,float16,fp8,0,0.05154079794883728
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,64,128,1,fp8,fp8,0,0.05146719813346863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,1,128,1,float16,float16,0,0.049646401405334474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,1,128,1,float16,fp8,0,0.04566720128059387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,8,128,1,float16,float16,0,0.16958240270614625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,1,128,1,fp8,fp8,0,0.045921599864959715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,2,128,1,float16,float16,0,0.04942240118980408
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,2,128,1,float16,fp8,0,0.04725759923458099
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,2,128,1,fp8,fp8,0,0.047363200783729555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,4,128,1,float16,fp8,0,0.047353601455688475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,64,128,1,fp8,fp8,0,0.09041119813919067
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,4,128,1,fp8,fp8,0,0.047249600291252136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,8,128,1,float16,float16,0,0.04954079985618591
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,8,128,1,float16,fp8,0,0.047336000204086306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,8,128,1,fp8,fp8,0,0.045444801449775696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,64,128,1,float16,float16,0,0.031046399474143983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,64,128,1,float16,fp8,0,0.02895680069923401
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,64,128,1,fp8,fp8,0,0.02906239926815033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,1,128,1,float16,float16,0,0.029065600037574767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,1,128,1,fp8,fp8,0,0.026956799626350402
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,2,128,1,float16,float16,0,0.02894560098648071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,2,128,1,float16,fp8,0,0.02686080038547516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,2,128,1,fp8,fp8,0,0.026940798759460448
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,64,128,1,float16,float16,0,0.05170720219612122
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,4,128,1,float16,float16,0,0.02890239953994751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,4,128,1,float16,fp8,0,0.026943999528884887
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,4,128,1,fp8,fp8,0,0.026952001452445983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,8,128,1,float16,float16,0,0.029105600714683533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,8,128,1,float16,fp8,0,0.026900801062583923
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,8,128,1,fp8,fp8,0,0.02698720097541809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,64,128,1,float16,float16,0,0.020751999318599702
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,64,128,1,float16,fp8,0,0.019766399264335634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,64,128,1,fp8,fp8,0,0.01881600022315979
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,1,128,1,float16,float16,0,0.018695999681949616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,1,128,1,float16,fp8,0,0.01879040002822876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,1,128,1,fp8,fp8,0,0.018694399297237395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,2,128,1,float16,float16,0,0.018884800374507904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,2,128,1,fp8,fp8,0,0.018782399594783783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,2,128,1,float16,fp8,0,0.018723200261592864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,4,128,1,float16,float16,0,0.04988960027694702
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,4,128,1,float16,float16,0,0.018964800238609313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,4,128,1,float16,fp8,0,0.018768000602722167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,4,128,1,fp8,fp8,0,0.018836799263954162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,8,128,1,float16,float16,0,0.018833599984645844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,8,128,1,float16,fp8,0,0.018803200125694274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,8,128,1,fp8,fp8,0,0.018913599848747253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,64,128,1,float16,float16,0,0.014668799936771393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,64,128,1,float16,fp8,0,0.014630399644374847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,64,128,1,fp8,fp8,0,0.014593599736690522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,1,128,1,float16,fp8,0,0.012614400684833526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,1,128,1,fp8,fp8,0,0.014718399941921234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,1,128,1,float16,fp8,0,0.026958400011062623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,2,128,1,float16,fp8,0,0.014560000598430633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,2,128,1,fp8,fp8,0,0.014481599628925323
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,4,128,1,float16,float16,0,0.01464959979057312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,4,128,1,float16,fp8,0,0.014451199769973755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,4,128,1,fp8,fp8,0,0.014665600657463074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,8,128,1,float16,float16,0,0.01464959979057312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,8,128,1,float16,fp8,0,0.014763200283050537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,8,128,1,fp8,fp8,0,0.014683200418949128
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,64,128,1,float16,float16,0,0.012827199697494508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,64,128,1,float16,fp8,0,0.010630399733781815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,64,128,1,fp8,fp8,0,0.010766399651765823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,1,128,1,float16,float16,0,0.010691200196743012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,1,128,1,float16,fp8,0,0.010787200182676315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,1,128,1,fp8,fp8,0,0.010627199709415436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,2,128,1,float16,float16,0,0.010705599933862687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,1,128,1,float16,float16,0,0.014699199795722961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,2,128,1,float16,fp8,0,0.010603199899196624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,2,128,1,fp8,fp8,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,4,128,1,float16,float16,0,0.010737600177526474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,4,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,4,128,1,fp8,fp8,0,0.010718400031328202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,8,128,1,float16,float16,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,8,128,1,float16,fp8,0,0.010684800148010255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,8,128,1,fp8,fp8,0,0.010567999631166457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,64,128,1,float16,fp8,0,0.010711999982595444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,64,128,1,fp8,fp8,0,0.01061599999666214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,1,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,1,128,1,float16,fp8,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,1,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,2,128,1,float16,float16,0,0.010676799714565277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,2,128,1,float16,fp8,0,0.010639999806880952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,2,128,1,fp8,fp8,0,0.010692799836397171
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,4,128,1,float16,float16,0,0.010673599690198899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,4,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,4,128,1,fp8,fp8,0,0.010635200142860412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,8,128,1,float16,float16,0,0.010595200210809707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,8,128,1,float16,fp8,0,0.010599999874830245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,8,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,2,128,1,float16,float16,0,0.012639999389648438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,1,128,1,float16,float16,0,0.31761438846588136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,1,128,1,fp8,fp8,0,0.2957360029220581
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,64,128,1,float16,float16,0,0.012692800164222718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,2,128,1,float16,float16,0,0.3176944017410278
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,2,128,1,float16,fp8,0,0.2953295946121216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,4,128,1,float16,float16,0,0.3162816047668457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,4,128,1,float16,fp8,0,0.2952768087387085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,4,128,1,fp8,fp8,0,0.2955280065536499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,8,128,1,float16,float16,0,0.31776158809661864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,8,128,1,fp8,fp8,0,0.2952687978744507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,0,0.1657472014427185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,0,0.15394400358200072
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,64,128,1,fp8,fp8,0,0.15387519598007202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,1,128,1,float16,float16,0,0.16393599510192872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,1,128,1,float16,fp8,0,0.15205119848251342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,1,128,1,float16,fp8,0,0.2976560115814209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,1,128,1,fp8,fp8,0,0.15215200185775757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,2,128,1,float16,float16,0,0.16487679481506348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,2,128,1,float16,fp8,0,0.1528831958770752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,2,128,1,fp8,fp8,0,0.1539199948310852
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,0,0.16430720090866088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,0,0.153985595703125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,2,128,1,fp8,fp8,0,0.2973423957824707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,4,128,1,fp8,fp8,0,0.152729594707489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,0,0.15401120185852052
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,8,128,1,fp8,fp8,0,0.1532896041870117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,0,0.08981599807739257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,0,0.08218240141868591
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,64,128,1,fp8,fp8,0,0.08236640095710754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,8,128,1,float16,fp8,0,0.29541280269622805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,1,128,1,float16,float16,0,0.08853920102119446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,1,128,1,float16,fp8,0,0.08211680054664612
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,1,128,1,fp8,fp8,0,0.08220160007476807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,2,128,1,float16,float16,0,0.08764479756355285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,2,128,1,float16,fp8,0,0.08231359720230103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,0,0.08775519728660583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,0,0.08234879970550538
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,4,128,1,fp8,fp8,0,0.0808896005153656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,0,0.088510400056839
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,0,0.08114399909973144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,8,128,1,fp8,fp8,0,0.0822655975818634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,0,0.049579200148582456
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,0,0.04558559954166412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,64,128,1,fp8,fp8,0,0.04543839991092682
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,1,128,1,float16,float16,0,0.0495855987071991
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,1,128,1,float16,fp8,0,0.04532159864902496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,1,128,1,fp8,fp8,0,0.045414400100708005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,2,128,1,float16,fp8,0,0.04561119973659515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,2,128,1,float16,float16,0,0.04945760071277618
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,2,128,1,fp8,fp8,0,0.04540640115737915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,0,0.045449599623680115
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,0,0.16448320150375367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,0,0.04944320023059845
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,0,0.04532000124454498
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,8,128,1,fp8,fp8,0,0.04532960057258606
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,0,0.03102880120277405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,0,0.026910400390625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,64,128,1,fp8,fp8,0,0.02686559855937958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,2,128,1,fp8,fp8,0,0.08227840065956116
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,1,128,1,float16,float16,0,0.029014399647712706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,1,128,1,float16,fp8,0,0.026873600482940675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,1,128,1,fp8,fp8,0,0.02696000039577484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,2,128,1,float16,float16,0,0.02898240089416504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,2,128,1,float16,fp8,0,0.026953598856925963
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,2,128,1,fp8,fp8,0,0.026953598856925963
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,0,0.02892639935016632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,0,0.02691679894924164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,4,128,1,fp8,fp8,0,0.026924800872802735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,0,0.02908639907836914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,0,0.026923200488090514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,8,128,1,fp8,fp8,0,0.027009600400924684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,0,0.020857599377632142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,0,0.04941279888153076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,0,0.018768000602722167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,64,128,1,fp8,fp8,0,0.018811200559139252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,1,128,1,float16,float16,0,0.018803200125694274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,4,128,1,fp8,fp8,0,0.045454400777816775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,1,128,1,fp8,fp8,0,0.018676799535751343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,2,128,1,float16,float16,0,0.019009600579738616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,2,128,1,float16,fp8,0,0.018700799345970152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,2,128,1,fp8,fp8,0,0.018729600310325622
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,0,0.018824000656604768
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,0,0.018668800592422485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,4,128,1,fp8,fp8,0,0.018676799535751343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,0,0.01892800033092499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,8,128,1,fp8,fp8,0,0.01881439983844757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,0,0.01653439998626709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,0,0.014670400321483612
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,64,128,1,fp8,fp8,0,0.0147024005651474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,1,128,1,float16,float16,0,0.014547200500965118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,1,128,1,float16,fp8,0,0.014070400595664978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,1,128,1,fp8,fp8,0,0.013779200613498688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,2,128,1,float16,float16,0,0.015014399588108063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,2,128,1,float16,fp8,0,0.0147024005651474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,2,128,1,fp8,fp8,0,0.012998400628566742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,1,128,1,float16,fp8,0,0.018768000602722167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,0,0.014711999893188476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,0,0.012681600451469422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,4,128,1,fp8,fp8,0,0.01276479959487915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,0,0.014633600413799287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,8,128,1,fp8,fp8,0,0.012694400548934937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,0,0.012811200320720672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,0,0.012601600587368011
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,0,0.018705600500106813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,0,0.010807999968528747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,64,128,1,fp8,fp8,0,0.010761599987745285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,1,128,1,float16,float16,0,0.01063840016722679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,1,128,1,float16,fp8,0,0.010688000172376633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,1,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,2,128,1,float16,fp8,0,0.01067200005054474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,0,0.0106175996363163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,4,128,1,fp8,fp8,0,0.01061440035700798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,0,0.010756800323724747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,8,128,1,fp8,fp8,0,0.010688000172376633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,0,0.012540799379348756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,64,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,1,128,1,float16,float16,0,0.010779199749231338
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,1,128,1,float16,fp8,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,1,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,2,128,1,float16,float16,0,0.010579200088977813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,2,128,1,float16,fp8,0,0.010564800351858139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,2,128,1,fp8,fp8,0,0.01053600013256073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,0,0.010595200210809707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,4,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,0,0.010571199655532836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,0,0.010630399733781815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,8,128,1,fp8,fp8,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,2,128,1,float16,float16,0,0.010630399733781815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,2,128,1,fp8,fp8,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,1,128,1,float16,fp8,0,16.894854736328124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,1,128,1,fp8,fp8,0,16.955282592773436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,1,128,1,float16,float16,0,31.1791259765625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,2,128,1,float16,float16,0,30.485577392578126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,2,128,1,float16,fp8,0,18.660336303710938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,2,128,1,fp8,fp8,0,17.92534942626953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,4,128,1,float16,fp8,0,19.17832794189453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,4,128,1,fp8,fp8,0,18.079254150390625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,4,128,1,float16,float16,0,32.02896423339844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,8,128,1,float16,float16,0,29.710498046875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,8,128,1,float16,fp8,0,20.06913604736328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,8,128,1,fp8,fp8,0,18.58037414550781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,48,128,1,float16,fp8,0,8.793721771240234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,48,128,1,fp8,fp8,0,10.196749114990235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,1,128,1,float16,float16,0,15.286746215820312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,1,128,1,float16,fp8,0,8.391654205322265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,1,128,1,fp8,fp8,0,9.804847717285156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,2,128,1,float16,fp8,0,9.057331085205078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,2,128,1,fp8,fp8,0,9.909865570068359
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,2,128,1,float16,float16,0,14.752813720703125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,4,128,1,float16,fp8,0,9.92856674194336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,4,128,1,fp8,fp8,0,8.987655639648438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,4,128,1,float16,float16,0,15.100309753417969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,8,128,1,float16,fp8,0,9.227574157714844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,8,128,1,fp8,fp8,0,8.518937683105468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,8,128,1,float16,float16,0,16.389544677734374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,48,128,1,float16,fp8,0,4.3944145202636715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,48,128,1,fp8,fp8,0,4.677555084228516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,1,128,1,float16,float16,0,6.661790466308593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,1,128,1,float16,fp8,0,4.644795227050781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,1,128,1,fp8,fp8,0,4.503939056396485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,2,128,1,float16,float16,0,7.379672241210938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,2,128,1,float16,fp8,0,4.549924850463867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,2,128,1,fp8,fp8,0,4.383500671386718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,4,128,1,float16,fp8,0,4.183342361450196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,4,128,1,float16,float16,0,6.771463775634766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,4,128,1,fp8,fp8,0,4.241438293457032
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,8,128,1,float16,float16,0,7.1610595703125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,8,128,1,float16,fp8,0,4.324875259399414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,8,128,1,fp8,fp8,0,4.403591918945312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,48,128,1,float16,fp8,0,2.3733343124389648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,48,128,1,fp8,fp8,0,2.2461215972900392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,1,128,1,float16,float16,0,3.0427776336669923
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,1,128,1,float16,fp8,0,2.273414421081543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,1,128,1,fp8,fp8,0,2.09628963470459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,2,128,1,float16,float16,0,2.5275503158569337
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,2,128,1,float16,fp8,0,2.4714527130126953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,2,128,1,fp8,fp8,0,2.074281692504883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,4,128,1,float16,float16,0,2.638177680969238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,4,128,1,float16,fp8,0,2.1937280654907227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,4,128,1,fp8,fp8,0,2.071816062927246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,8,128,1,float16,float16,0,2.8042671203613283
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,8,128,1,float16,fp8,0,2.3208656311035156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,8,128,1,fp8,fp8,0,2.1876527786254885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,1,128,1,float16,float16,0,17.018531799316406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,1,128,1,float16,fp8,0,10.1790771484375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,1,128,1,fp8,fp8,0,10.074739074707031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,48,128,1,float16,float16,0,3.0449615478515626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,48,128,1,float16,float16,0,6.7208000183105465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,2,128,1,float16,float16,0,17.64635772705078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,48,128,1,float16,float16,0,14.617759704589844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,2,128,1,float16,fp8,0,10.414723205566407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,2,128,1,fp8,fp8,0,9.621883392333984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,4,128,1,float16,fp8,0,11.01537628173828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,4,128,1,fp8,fp8,0,10.925536346435546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,4,128,1,float16,float16,0,16.713661193847656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,8,128,1,float16,float16,0,17.43816375732422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,8,128,1,float16,fp8,0,11.201441955566406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,8,128,1,fp8,fp8,0,11.588215637207032
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,48,128,1,float16,float16,0,8.889435577392579
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,48,128,1,float16,fp8,0,5.362496185302734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,48,128,1,fp8,fp8,0,5.7975822448730465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,1,128,1,float16,fp8,0,4.818252944946289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,1,128,1,fp8,fp8,0,4.772323226928711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,1,128,1,float16,float16,0,9.100955200195312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,2,128,1,float16,fp8,0,5.006166458129883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,2,128,1,fp8,fp8,0,4.7753345489501955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,2,128,1,float16,float16,0,9.228968048095703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,4,128,1,float16,fp8,0,4.922715377807617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,4,128,1,fp8,fp8,0,4.977068710327148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,4,128,1,float16,float16,0,9.120894622802734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,8,128,1,float16,fp8,0,5.01426887512207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,48,128,1,float16,float16,0,4.777371215820312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,8,128,1,float16,float16,0,9.380241394042969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,8,128,1,fp8,fp8,0,5.213457489013672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,48,128,1,float16,fp8,0,2.7617919921875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,1,128,1,float16,fp8,0,2.671459197998047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,48,128,1,fp8,fp8,0,2.718646430969238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,1,128,1,float16,float16,0,4.437665557861328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,1,128,1,fp8,fp8,0,2.618948745727539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,2,128,1,float16,fp8,0,2.521174430847168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,2,128,1,float16,float16,0,3.840598297119141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,2,128,1,fp8,fp8,0,2.7955392837524413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,4,128,1,float16,float16,0,3.3233310699462892
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,4,128,1,float16,fp8,0,2.508910369873047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,4,128,1,fp8,fp8,0,2.678280067443848
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,8,128,1,float16,float16,0,2.779422378540039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,8,128,1,fp8,fp8,0,2.653003120422363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,48,128,1,float16,float16,0,1.560859203338623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,48,128,1,fp8,fp8,0,1.3498240470886231
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,48,128,1,float16,fp8,0,1.8408943176269532
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,1,128,1,float16,float16,0,2.16910400390625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,1,128,1,float16,fp8,0,1.2430735588073731
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,1,128,1,fp8,fp8,0,1.2512127876281738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,8,128,1,float16,fp8,0,2.607441520690918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,2,128,1,float16,fp8,0,1.284641647338867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,2,128,1,fp8,fp8,0,1.2289615631103517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,2,128,1,float16,float16,0,1.7834447860717773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,4,128,1,float16,float16,0,1.3906496047973633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,4,128,1,float16,fp8,0,1.284817600250244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,4,128,1,fp8,fp8,0,1.350107192993164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,8,128,1,float16,float16,0,1.7056224822998047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,8,128,1,float16,fp8,0,1.4247936248779296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,8,128,1,fp8,fp8,0,1.3095744132995606
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,1,128,1,fp8,fp8,0,6.900682830810547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,1,128,1,float16,fp8,0,7.086702728271485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,1,128,1,float16,float16,0,13.317892456054688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,2,128,1,float16,float16,0,13.194573974609375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,2,128,1,float16,fp8,0,6.903473663330078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,2,128,1,fp8,fp8,0,7.081797027587891
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,4,128,1,float16,fp8,0,7.9506782531738285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,4,128,1,fp8,fp8,0,7.134425354003906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,4,128,1,float16,float16,0,13.161933898925781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,8,128,1,float16,float16,0,12.46988296508789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,8,128,1,float16,fp8,0,7.916574096679687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,48,128,1,float16,fp8,0,3.745529556274414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,48,128,1,float16,float16,0,6.48935546875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,8,128,1,fp8,fp8,0,7.639366149902344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,48,128,1,fp8,fp8,0,4.051607894897461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,1,128,1,float16,fp8,0,3.5808528900146483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,1,128,1,float16,float16,0,5.76800308227539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,1,128,1,fp8,fp8,0,3.319416046142578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,2,128,1,float16,fp8,0,3.6722110748291015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,2,128,1,float16,float16,0,6.503355407714844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,2,128,1,fp8,fp8,0,3.536376190185547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,4,128,1,float16,float16,0,3.790662384033203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,4,128,1,fp8,fp8,0,3.4080879211425783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,4,128,1,float16,fp8,0,3.946428680419922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,48,128,1,float16,float16,0,2.8226207733154296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,8,128,1,fp8,fp8,0,3.5818958282470703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,8,128,1,float16,float16,0,4.921161651611328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,8,128,1,float16,fp8,0,4.238942337036133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,48,128,1,float16,fp8,0,2.1495328903198243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,48,128,1,fp8,fp8,0,1.8798959732055665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,1,128,1,float16,fp8,0,1.937272071838379
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,1,128,1,float16,float16,0,2.670483207702637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,1,128,1,fp8,fp8,0,1.8828559875488282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,2,128,1,float16,float16,0,2.4936208724975586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,2,128,1,float16,fp8,0,1.8431184768676758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,2,128,1,fp8,fp8,0,2.066598320007324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,4,128,1,float16,float16,0,2.6366144180297852
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,4,128,1,fp8,fp8,0,1.8121599197387694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,4,128,1,float16,fp8,0,2.027801513671875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,8,128,1,float16,float16,0,2.1911151885986326
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,48,128,1,float16,float16,0,1.0914992332458495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,8,128,1,fp8,fp8,0,1.696708869934082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,8,128,1,float16,fp8,0,2.077403259277344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,48,128,1,fp8,fp8,0,1.1254096031188965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,48,128,1,float16,fp8,0,1.4476592063903808
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,1,128,1,float16,fp8,0,1.0178655624389648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,1,128,1,fp8,fp8,0,0.8895279884338378
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,2,128,1,float16,float16,0,1.0145232200622558
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,2,128,1,float16,fp8,0,0.8960703849792481
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,2,128,1,fp8,fp8,0,0.871735954284668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,4,128,1,float16,float16,0,1.027235221862793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,4,128,1,float16,fp8,0,0.8805104255676269
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,4,128,1,fp8,fp8,0,0.8719391822814941
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,8,128,1,float16,fp8,0,0.8770544052124023
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,8,128,1,float16,float16,0,1.078286361694336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,8,128,1,fp8,fp8,0,0.8715167999267578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,1,128,1,float16,float16,0,1.0799200057983398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,1,128,1,float16,fp8,0,9.408612823486328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,1,128,1,fp8,fp8,0,9.546965026855469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,2,128,1,float16,float16,0,15.991285705566407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,1,128,1,float16,float16,0,16.578201293945312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,2,128,1,float16,fp8,0,9.542562866210938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,2,128,1,fp8,fp8,0,9.082608032226563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,4,128,1,float16,fp8,0,10.573641967773437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,4,128,1,fp8,fp8,0,9.62377471923828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,4,128,1,float16,float16,0,17.486729431152344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,8,128,1,float16,fp8,0,10.496199798583984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,8,128,1,float16,float16,0,15.427583312988281
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,8,128,1,fp8,fp8,0,9.724996948242188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,48,128,1,float16,float16,0,8.93015365600586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,48,128,1,float16,fp8,0,5.558070373535156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,48,128,1,fp8,fp8,0,5.337686538696289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,1,128,1,float16,float16,0,7.704236602783203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,1,128,1,float16,fp8,0,4.587236785888672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,1,128,1,fp8,fp8,0,5.237115097045899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,2,128,1,float16,float16,0,7.853343963623047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,2,128,1,float16,fp8,0,4.675990295410156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,2,128,1,fp8,fp8,0,5.2549072265625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,4,128,1,float16,fp8,0,4.498628616333008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,4,128,1,float16,float16,0,8.645845031738281
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,4,128,1,fp8,fp8,0,4.739126586914063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,8,128,1,float16,float16,0,8.647473907470703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,48,128,1,float16,float16,0,3.4964977264404298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,8,128,1,float16,fp8,0,4.552164840698242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,8,128,1,fp8,fp8,0,5.191147232055664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,48,128,1,float16,fp8,0,2.5988527297973634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,48,128,1,fp8,fp8,0,2.581227111816406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,1,128,1,float16,float16,0,3.6366256713867187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,1,128,1,fp8,fp8,0,2.4241024017333985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,2,128,1,float16,float16,0,2.9946895599365235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,2,128,1,float16,fp8,0,2.2752544403076174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,1,128,1,float16,fp8,0,2.3051408767700194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,2,128,1,fp8,fp8,0,2.5796064376831054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,4,128,1,float16,fp8,0,2.1891551971435548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,4,128,1,float16,float16,0,3.632556915283203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,4,128,1,fp8,fp8,0,2.1686847686767576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,8,128,1,float16,fp8,0,2.287166404724121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,48,128,1,float16,float16,0,1.5508671760559083
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,8,128,1,float16,float16,0,3.8321937561035155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,48,128,1,float16,fp8,0,1.2642687797546386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,8,128,1,fp8,fp8,0,2.7623855590820314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,48,128,1,fp8,fp8,0,1.5521663665771483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,1,128,1,float16,fp8,0,1.1081999778747558
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,1,128,1,float16,float16,0,2.2302175521850587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,1,128,1,fp8,fp8,0,1.2518783569335938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,2,128,1,float16,float16,0,1.424131202697754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,2,128,1,float16,fp8,0,1.1302016258239747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,4,128,1,float16,float16,0,1.2827296257019043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,2,128,1,fp8,fp8,0,1.7466863632202148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,4,128,1,float16,fp8,0,1.419320011138916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,4,128,1,fp8,fp8,0,1.1990976333618164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,48,128,1,float16,float16,0,0.7183440208435059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,8,128,1,float16,fp8,0,1.121895980834961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,8,128,1,fp8,fp8,0,1.1435407638549804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,8,128,1,float16,float16,0,1.9047183990478516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,48,128,1,float16,fp8,0,0.6418288230895997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,48,128,1,fp8,fp8,0,0.6494495868682861
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,1,128,1,float16,float16,0,0.6850096225738526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,1,128,1,float16,fp8,0,0.7759136199951172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,1,128,1,fp8,fp8,0,0.6089360237121582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,2,128,1,float16,float16,0,0.6938720226287842
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,2,128,1,fp8,fp8,0,0.5930831909179688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,2,128,1,float16,fp8,0,0.7535855770111084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,4,128,1,float16,float16,0,0.7018671989440918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,4,128,1,float16,fp8,0,0.5919087886810303
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,4,128,1,fp8,fp8,0,0.6048160076141358
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,8,128,1,float16,float16,0,0.7308527946472168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,8,128,1,float16,fp8,0,0.5905951976776123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,8,128,1,fp8,fp8,0,0.6025712013244628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,1,128,1,float16,fp8,0,5.264508819580078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,1,128,1,fp8,fp8,0,5.237113571166992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,1,128,1,float16,float16,0,9.103444671630859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,2,128,1,float16,fp8,0,5.339438247680664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,2,128,1,fp8,fp8,0,5.262209701538086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,2,128,1,float16,float16,0,8.24662094116211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,4,128,1,float16,fp8,0,5.444715118408203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,4,128,1,fp8,fp8,0,5.312476730346679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,4,128,1,float16,float16,0,9.696966552734375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,8,128,1,float16,float16,0,9.16733627319336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,8,128,1,float16,fp8,0,5.632702255249024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,8,128,1,fp8,fp8,0,5.556428909301758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,48,128,1,float16,float16,0,5.476116943359375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,48,128,1,float16,fp8,0,2.9730031967163084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,48,128,1,fp8,fp8,0,2.980254364013672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,1,128,1,float16,float16,0,3.940951919555664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,1,128,1,float16,fp8,0,3.1238143920898436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,1,128,1,fp8,fp8,0,2.898446464538574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,2,128,1,float16,fp8,0,2.77476806640625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,2,128,1,float16,float16,0,4.495636749267578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,2,128,1,fp8,fp8,0,2.577799987792969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,4,128,1,float16,float16,0,4.724043273925782
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,4,128,1,float16,fp8,0,2.728761672973633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,4,128,1,fp8,fp8,0,2.8711311340332033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,8,128,1,float16,float16,0,3.706203079223633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,48,128,1,float16,float16,0,1.8552656173706055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,8,128,1,fp8,fp8,0,2.7723600387573244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,8,128,1,float16,fp8,0,3.058785629272461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,48,128,1,float16,fp8,0,1.9779136657714844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,1,128,1,float16,float16,0,1.4508352279663086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,48,128,1,fp8,fp8,0,1.5873567581176757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,1,128,1,float16,fp8,0,1.2820752143859864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,1,128,1,fp8,fp8,0,1.3434384346008301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,2,128,1,float16,fp8,0,1.27225923538208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,2,128,1,fp8,fp8,0,1.3161631584167481
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,4,128,1,float16,float16,0,1.5060192108154298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,4,128,1,float16,fp8,0,1.2755295753479003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,4,128,1,fp8,fp8,0,1.303617572784424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,8,128,1,float16,fp8,0,1.2933247566223145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,8,128,1,float16,float16,0,2.270145606994629
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,8,128,1,fp8,fp8,0,1.3264687538146973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,48,128,1,float16,float16,0,0.8289376258850097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,48,128,1,fp8,fp8,0,0.7534351825714112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,48,128,1,float16,fp8,0,1.2841631889343261
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,2,128,1,float16,float16,0,2.082744026184082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,1,128,1,float16,float16,0,0.7875376224517823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,1,128,1,float16,fp8,0,0.6743616104125977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,1,128,1,fp8,fp8,0,0.7476016044616699
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,2,128,1,float16,float16,0,0.761952018737793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,2,128,1,float16,fp8,0,0.868830394744873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,2,128,1,fp8,fp8,0,0.6733568191528321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,4,128,1,float16,float16,0,0.7833920001983643
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,4,128,1,float16,fp8,0,0.7704224109649658
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,4,128,1,fp8,fp8,0,0.6762688159942627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,8,128,1,float16,float16,0,0.7658592224121094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,8,128,1,float16,fp8,0,0.7210415840148926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,8,128,1,fp8,fp8,0,0.7710768222808838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,48,128,1,float16,fp8,0,0.5230688095092774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,48,128,1,fp8,fp8,0,0.4287136077880859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,1,128,1,float16,float16,0,0.419265604019165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,1,128,1,float16,fp8,0,0.38027520179748536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,1,128,1,fp8,fp8,0,0.3939663887023926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,2,128,1,float16,float16,0,0.4193136215209961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,2,128,1,float16,fp8,0,0.379313588142395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,2,128,1,fp8,fp8,0,0.37499840259552003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,4,128,1,float16,float16,0,0.42218241691589353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,4,128,1,float16,fp8,0,0.3791856050491333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,4,128,1,fp8,fp8,0,0.3749855995178223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,8,128,1,float16,float16,0,0.4268959999084473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,8,128,1,float16,fp8,0,0.3692960023880005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,8,128,1,fp8,fp8,0,0.3738159894943237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,48,128,1,float16,float16,0,0.44979038238525393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,1,128,1,float16,fp8,0,5.212969589233398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,1,128,1,fp8,fp8,0,5.681603240966797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,1,128,1,float16,float16,0,7.680092620849609
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,2,128,1,float16,float16,0,7.734156799316406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,2,128,1,float16,fp8,0,5.151363372802734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,2,128,1,fp8,fp8,0,5.242382431030274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,4,128,1,float16,fp8,0,4.917225646972656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,4,128,1,float16,float16,0,7.880289459228516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,4,128,1,fp8,fp8,0,5.494206237792969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,8,128,1,float16,fp8,0,5.048046493530274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,8,128,1,float16,float16,0,9.20965118408203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,48,128,1,float16,float16,0,4.312823867797851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,8,128,1,fp8,fp8,0,5.436259078979492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,48,128,1,float16,fp8,0,2.851799964904785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,48,128,1,fp8,fp8,0,3.0942607879638673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,1,128,1,float16,fp8,0,2.576499176025391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,1,128,1,fp8,fp8,0,2.497640037536621
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,1,128,1,float16,float16,0,4.62444953918457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,2,128,1,float16,fp8,0,2.5056224822998048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,2,128,1,float16,float16,0,4.164353561401367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,2,128,1,fp8,fp8,0,2.8002111434936525
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,4,128,1,float16,float16,0,3.519358444213867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,4,128,1,float16,fp8,0,2.809556770324707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,4,128,1,fp8,fp8,0,2.551051139831543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,48,128,1,float16,float16,0,1.594099235534668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,8,128,1,float16,fp8,0,2.622670364379883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,8,128,1,fp8,fp8,0,2.680678367614746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,8,128,1,float16,float16,0,3.882566452026367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,48,128,1,float16,fp8,0,1.5321696281433106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,48,128,1,fp8,fp8,0,1.6176944732666017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,1,128,1,float16,float16,0,1.5435711860656738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,1,128,1,float16,fp8,0,1.3399248123168945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,1,128,1,fp8,fp8,0,1.2203696250915528
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,2,128,1,fp8,fp8,0,1.237609577178955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,2,128,1,float16,fp8,0,1.8735055923461914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,4,128,1,float16,float16,0,1.3232943534851074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,4,128,1,float16,fp8,0,1.2461551666259765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,4,128,1,fp8,fp8,0,1.2517919540405273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,8,128,1,float16,float16,0,1.7085023880004884
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,8,128,1,float16,fp8,0,1.2300944328308105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,8,128,1,fp8,fp8,0,1.2568880081176759
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,48,128,1,float16,float16,0,0.8870112419128418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,2,128,1,float16,float16,0,1.5115936279296875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,48,128,1,fp8,fp8,0,0.8108287811279297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,1,128,1,float16,fp8,0,0.6428175926208496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,1,128,1,float16,float16,0,0.8439824104309082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,1,128,1,fp8,fp8,0,0.6569488048553467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,2,128,1,float16,float16,0,0.7099599838256836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,2,128,1,float16,fp8,0,0.8516639709472656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,2,128,1,fp8,fp8,0,0.6366767883300781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,4,128,1,float16,float16,0,0.7164112091064453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,4,128,1,float16,fp8,0,0.6734288215637207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,4,128,1,fp8,fp8,0,0.634332799911499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,8,128,1,float16,float16,0,0.9288816452026367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,8,128,1,float16,fp8,0,0.6331520080566406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,8,128,1,fp8,fp8,0,0.634825611114502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,48,128,1,float16,fp8,0,0.7258272171020508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,48,128,1,float16,fp8,0,0.38573760986328126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,48,128,1,fp8,fp8,0,0.38632960319519044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,48,128,1,float16,float16,0,0.5481584072113037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,1,128,1,float16,float16,0,0.3847775936126709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,1,128,1,float16,fp8,0,0.341644811630249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,1,128,1,fp8,fp8,0,0.3444144010543823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,2,128,1,float16,float16,0,0.486027193069458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,2,128,1,fp8,fp8,0,0.3423487901687622
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,4,128,1,float16,float16,0,0.3812175989151001
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,4,128,1,float16,fp8,0,0.43424158096313475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,4,128,1,fp8,fp8,0,0.3420192003250122
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,8,128,1,float16,float16,0,0.3851248025894165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,8,128,1,float16,fp8,0,0.4194015979766846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,8,128,1,fp8,fp8,0,0.34211039543151855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,48,128,1,float16,fp8,0,0.25774240493774414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,48,128,1,fp8,fp8,0,0.21546719074249268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,1,128,1,float16,fp8,0,0.19101279973983765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,1,128,1,fp8,fp8,0,0.19114400148391725
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,2,128,1,float16,float16,0,0.2091615915298462
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,2,128,1,float16,fp8,0,0.19159359931945802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,2,128,1,fp8,fp8,0,0.19118560552597047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,4,128,1,float16,float16,0,0.21102240085601806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,2,128,1,float16,fp8,0,0.34822559356689453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,4,128,1,float16,fp8,0,0.19034080505371093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,4,128,1,fp8,fp8,0,0.19362560510635377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,8,128,1,float16,float16,0,0.2148751974105835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,8,128,1,float16,fp8,0,0.19339679479598998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,8,128,1,fp8,fp8,0,0.19149919748306274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,48,128,1,float16,float16,0,0.23963840007781984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,1,128,1,float16,float16,0,0.24052000045776367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,1,128,1,float16,fp8,0,2.894220733642578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,1,128,1,fp8,fp8,0,2.963262367248535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,1,128,1,float16,float16,0,3.8347023010253904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,2,128,1,float16,float16,0,4.0144287109375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,2,128,1,float16,fp8,0,2.8995664596557615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,2,128,1,fp8,fp8,0,3.0539648056030275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,4,128,1,float16,float16,0,4.389244842529297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,4,128,1,float16,fp8,0,3.415591812133789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,4,128,1,fp8,fp8,0,2.898259162902832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,8,128,1,float16,float16,0,4.644697570800782
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,48,128,1,float16,float16,0,2.1128463745117188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,8,128,1,float16,fp8,0,2.9430992126464846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,8,128,1,fp8,fp8,0,3.1801664352416994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,1,128,1,float16,float16,0,1.6011295318603516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,48,128,1,float16,fp8,0,1.8438480377197266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,48,128,1,fp8,fp8,0,1.7278240203857422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,1,128,1,float16,fp8,0,1.5305120468139648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,2,128,1,float16,float16,0,1.578435230255127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,2,128,1,float16,fp8,0,1.4594816207885741
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,1,128,1,fp8,fp8,0,1.8009023666381836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,2,128,1,fp8,fp8,0,1.4795215606689454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,4,128,1,float16,fp8,0,1.4732576370239259
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,4,128,1,fp8,fp8,0,1.497105598449707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,8,128,1,float16,float16,0,1.6528688430786134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,4,128,1,float16,float16,0,2.222371292114258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,48,128,1,float16,float16,0,1.002729606628418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,8,128,1,fp8,fp8,0,1.4726143836975099
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,48,128,1,float16,fp8,0,1.081110382080078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,1,128,1,float16,float16,0,0.8311871528625489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,1,128,1,float16,fp8,0,0.7819407939910888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,48,128,1,fp8,fp8,0,1.1489456176757813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,1,128,1,fp8,fp8,0,1.0850655555725097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,2,128,1,float16,float16,0,1.0244367599487305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,2,128,1,float16,fp8,0,0.7615520000457764
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,2,128,1,fp8,fp8,0,0.7667967796325683
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,8,128,1,float16,fp8,0,1.4677632331848145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,4,128,1,float16,float16,0,0.8300944328308105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,4,128,1,float16,fp8,0,0.7620463848114014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,4,128,1,fp8,fp8,0,0.7479248046875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,8,128,1,float16,fp8,0,0.7629487991333008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,8,128,1,fp8,fp8,0,0.7468463897705078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,48,128,1,float16,float16,0,0.5228271961212159
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,48,128,1,float16,fp8,0,0.46131038665771484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,48,128,1,fp8,fp8,0,0.4801072120666504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,1,128,1,float16,float16,0,0.4291776180267334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,1,128,1,float16,fp8,0,0.4078351974487305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,1,128,1,fp8,fp8,0,0.39659359455108645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,2,128,1,float16,fp8,0,0.3959696054458618
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,2,128,1,fp8,fp8,0,0.3997776031494141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,4,128,1,float16,float16,0,0.42946720123291016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,4,128,1,float16,fp8,0,0.3982815980911255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,4,128,1,fp8,fp8,0,0.3945775985717773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,8,128,1,float16,float16,0,1.0052127838134766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,8,128,1,float16,float16,0,0.44133601188659666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,8,128,1,float16,fp8,0,0.39593279361724854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,8,128,1,fp8,fp8,0,0.40015997886657717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,48,128,1,float16,float16,0,0.2762752056121826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,48,128,1,float16,fp8,0,0.25129759311676025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,48,128,1,fp8,fp8,0,0.2542736053466797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,1,128,1,float16,float16,0,0.23557600975036622
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,1,128,1,float16,fp8,0,0.21722400188446045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,1,128,1,fp8,fp8,0,0.21927518844604493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,2,128,1,float16,float16,0,0.23615520000457763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,2,128,1,fp8,fp8,0,0.21936800479888915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,4,128,1,float16,float16,0,0.23703200817108155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,4,128,1,float16,fp8,0,0.21949760913848876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,4,128,1,fp8,fp8,0,0.2168816089630127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,2,128,1,float16,float16,0,0.43999199867248534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,8,128,1,float16,float16,0,0.24375839233398439
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,8,128,1,float16,fp8,0,0.2170624017715454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,48,128,1,float16,float16,0,0.1571760058403015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,48,128,1,float16,fp8,0,0.14248640537261964
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,48,128,1,fp8,fp8,0,0.14407039880752565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,1,128,1,float16,float16,0,0.13259040117263793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,1,128,1,float16,fp8,0,0.12468960285186767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,1,128,1,fp8,fp8,0,0.12338240146636963
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,2,128,1,float16,float16,0,0.1341007947921753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,2,128,1,fp8,fp8,0,0.12441279888153076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,4,128,1,float16,float16,0,0.13536479473114013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,4,128,1,float16,fp8,0,0.12510080337524415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,4,128,1,fp8,fp8,0,0.1253615975379944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,8,128,1,float16,float16,0,0.13822879791259765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,8,128,1,float16,fp8,0,0.12523679733276366
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,8,128,1,fp8,fp8,0,0.12530399560928346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,8,128,1,fp8,fp8,0,0.22117600440979004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,2,128,1,float16,fp8,0,0.12354079484939576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,2,128,1,float16,fp8,0,0.2175503969192505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,1,128,1,float16,float16,0,3.7716014862060545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,1,128,1,float16,fp8,0,2.9312400817871094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,1,128,1,fp8,fp8,0,2.914255905151367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,2,128,1,float16,float16,0,3.918796920776367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,2,128,1,float16,fp8,0,3.117892837524414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,4,128,1,float16,float16,0,4.391828918457032
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,4,128,1,float16,fp8,0,2.9086896896362306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,4,128,1,fp8,fp8,0,2.9361568450927735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,2,128,1,fp8,fp8,0,3.018462371826172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,48,128,1,float16,float16,0,2.048214340209961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,8,128,1,float16,fp8,0,2.915135955810547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,8,128,1,fp8,fp8,0,2.942163276672363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,8,128,1,float16,float16,0,4.795129776000977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,48,128,1,float16,fp8,0,1.9788415908813477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,1,128,1,float16,float16,0,1.5637791633605957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,48,128,1,fp8,fp8,0,1.863684844970703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,1,128,1,float16,fp8,0,1.5983327865600585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,1,128,1,fp8,fp8,0,1.5289711952209473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,2,128,1,float16,fp8,0,1.4747535705566406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,2,128,1,fp8,fp8,0,1.4939104080200196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,4,128,1,float16,float16,0,1.8034671783447265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,4,128,1,float16,fp8,0,1.471928024291992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,4,128,1,fp8,fp8,0,1.4736672401428224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,8,128,1,float16,fp8,0,1.4690431594848632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,8,128,1,fp8,fp8,0,1.4704591751098632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,2,128,1,float16,float16,0,1.8282800674438477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,8,128,1,float16,float16,0,2.3218095779418944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,48,128,1,float16,float16,0,1.0184639930725097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,48,128,1,fp8,fp8,0,0.9417951583862305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,1,128,1,float16,float16,0,0.9815728187561035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,1,128,1,float16,fp8,0,0.9608112335205078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,1,128,1,fp8,fp8,0,0.7662240028381347
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,2,128,1,float16,float16,0,1.0248784065246581
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,2,128,1,float16,fp8,0,0.7532944202423095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,2,128,1,fp8,fp8,0,0.7645408153533936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,4,128,1,float16,float16,0,1.0157872200012208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,4,128,1,float16,fp8,0,0.751587200164795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,4,128,1,fp8,fp8,0,0.7621503829956054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,48,128,1,float16,fp8,0,0.9519519805908203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,8,128,1,float16,fp8,0,0.7532976150512696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,8,128,1,float16,float16,0,1.0525424003601074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,8,128,1,fp8,fp8,0,0.7594528198242188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,48,128,1,float16,float16,0,0.5294784069061279
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,48,128,1,float16,fp8,0,0.48253440856933594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,1,128,1,float16,float16,0,0.42144160270690917
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,48,128,1,fp8,fp8,0,0.6331520080566406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,1,128,1,float16,fp8,0,0.39325919151306155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,1,128,1,fp8,fp8,0,0.3989504098892212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,2,128,1,float16,float16,0,0.6203680038452148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,2,128,1,float16,fp8,0,0.39307680130004885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,2,128,1,fp8,fp8,0,0.398086404800415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,4,128,1,float16,fp8,0,0.39210240840911864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,4,128,1,float16,float16,0,0.5564879894256591
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,4,128,1,fp8,fp8,0,0.39862399101257323
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,8,128,1,float16,float16,0,0.4278656005859375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,8,128,1,float16,fp8,0,0.482692813873291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,8,128,1,fp8,fp8,0,0.3960063934326172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,48,128,1,float16,float16,0,0.2787247896194458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,48,128,1,float16,fp8,0,0.2678447961807251
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,48,128,1,fp8,fp8,0,0.25852160453796386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,1,128,1,float16,float16,0,0.22584159374237062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,1,128,1,float16,fp8,0,0.21456000804901124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,1,128,1,fp8,fp8,0,0.21415040493011475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,2,128,1,float16,float16,0,0.22507359981536865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,2,128,1,float16,fp8,0,0.21487679481506347
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,2,128,1,fp8,fp8,0,0.2143183946609497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,4,128,1,float16,float16,0,0.22626080513000488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,4,128,1,float16,fp8,0,0.21430559158325196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,4,128,1,fp8,fp8,0,0.21436800956726074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,8,128,1,float16,fp8,0,0.2133631944656372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,48,128,1,float16,float16,0,0.15770879983901978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,48,128,1,float16,fp8,0,0.1437824010848999
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,48,128,1,fp8,fp8,0,0.14408479928970336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,1,128,1,float16,float16,0,0.12727839946746827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,1,128,1,fp8,fp8,0,0.11903519630432129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,2,128,1,float16,float16,0,0.1281231999397278
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,2,128,1,float16,fp8,0,0.11863520145416259
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,2,128,1,fp8,fp8,0,0.1189695954322815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,4,128,1,float16,float16,0,0.12924799919128419
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,4,128,1,float16,fp8,0,0.11843520402908325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,4,128,1,fp8,fp8,0,0.11832640171051026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,8,128,1,float16,float16,0,0.13182400465011596
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,8,128,1,float16,float16,0,0.2303567886352539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,8,128,1,fp8,fp8,0,0.11759999990463257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,8,128,1,fp8,fp8,0,0.21511518955230713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,48,128,1,float16,float16,0,0.0908959984779358
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,48,128,1,float16,fp8,0,0.08333439826965332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,48,128,1,fp8,fp8,0,0.08269280195236206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,1,128,1,float16,float16,0,0.07559999823570251
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,1,128,1,float16,fp8,0,0.07090880274772644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,1,128,1,fp8,fp8,0,0.07226719856262206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,2,128,1,float16,float16,0,0.07480319738388061
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,2,128,1,float16,fp8,0,0.07161759734153747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,2,128,1,fp8,fp8,0,0.07151039838790893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,4,128,1,float16,float16,0,0.07642560005187989
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,4,128,1,float16,fp8,0,0.0715120017528534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,4,128,1,fp8,fp8,0,0.07197759747505188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,8,128,1,float16,float16,0,0.07622560262680053
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,8,128,1,float16,fp8,0,0.07132800221443177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,8,128,1,fp8,fp8,0,0.07108640074729919
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,8,128,1,float16,fp8,0,0.11825759410858154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,1,128,1,float16,fp8,0,1.852020835876465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,1,128,1,float16,fp8,0,0.11712160110473632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,1,128,1,fp8,fp8,0,1.8623392105102539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,2,128,1,float16,float16,0,1.8639936447143555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,2,128,1,float16,fp8,0,1.8525472640991212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,2,128,1,fp8,fp8,0,1.8601119995117188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,1,128,1,float16,float16,0,2.0680688858032226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,4,128,1,float16,float16,0,2.5656848907470704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,4,128,1,float16,fp8,0,1.8495792388916015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,4,128,1,fp8,fp8,0,1.859071922302246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,8,128,1,float16,float16,0,1.9879823684692384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,48,128,1,float16,float16,0,1.3388400077819824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,8,128,1,fp8,fp8,0,1.8502031326293946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,48,128,1,float16,fp8,0,1.4992575645446777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,1,128,1,float16,float16,0,0.967465591430664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,48,128,1,fp8,fp8,0,1.3856800079345704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,1,128,1,float16,fp8,0,0.9531503677368164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,1,128,1,fp8,fp8,0,1.1426992416381836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,2,128,1,float16,float16,0,0.9919407844543457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,2,128,1,float16,fp8,0,0.9527376174926758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,8,128,1,float16,fp8,0,1.8468048095703125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,2,128,1,fp8,fp8,0,0.9412976264953613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,4,128,1,float16,float16,0,1.1049872398376466
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,4,128,1,float16,fp8,0,0.9507984161376953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,4,128,1,fp8,fp8,0,0.9404576301574707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,8,128,1,float16,float16,0,1.006657600402832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,8,128,1,fp8,fp8,0,0.9452063560485839
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,8,128,1,float16,fp8,0,1.420910358428955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,48,128,1,float16,float16,0,0.671772813796997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,48,128,1,float16,fp8,0,0.7730000019073486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,48,128,1,fp8,fp8,0,0.6237872123718262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,1,128,1,float16,float16,0,0.585268783569336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,1,128,1,float16,fp8,0,0.4868783950805664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,1,128,1,fp8,fp8,0,0.4877615928649902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,2,128,1,float16,float16,0,0.508243179321289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,2,128,1,float16,fp8,0,0.5045263767242432
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,2,128,1,fp8,fp8,0,0.48518080711364747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,4,128,1,float16,float16,0,0.5103456020355225
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,4,128,1,float16,fp8,0,0.48974080085754396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,4,128,1,fp8,fp8,0,0.5259103775024414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,8,128,1,float16,float16,0,0.5203711986541748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,8,128,1,float16,fp8,0,0.5161920070648194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,48,128,1,float16,float16,0,0.3600480079650879
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,48,128,1,float16,fp8,0,0.32272160053253174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,48,128,1,fp8,fp8,0,0.34495840072631834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,1,128,1,float16,float16,0,0.2660655975341797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,1,128,1,float16,fp8,0,0.26006240844726564
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,1,128,1,fp8,fp8,0,0.2560784101486206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,2,128,1,float16,float16,0,0.27079360485076903
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,2,128,1,float16,fp8,0,0.2554816007614136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,2,128,1,fp8,fp8,0,0.2591104030609131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,4,128,1,float16,float16,0,0.26954400539398193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,4,128,1,float16,fp8,0,0.2590912103652954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,4,128,1,fp8,fp8,0,0.2550816059112549
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,8,128,1,float16,float16,0,0.2812623977661133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,8,128,1,float16,fp8,0,0.25401599407196046
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,8,128,1,fp8,fp8,0,0.25772318840026853
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,48,128,1,float16,float16,0,0.18880800008773804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,48,128,1,float16,fp8,0,0.17587679624557495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,1,128,1,float16,float16,0,0.14948159456253052
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,1,128,1,float16,fp8,0,0.14059200286865234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,1,128,1,fp8,fp8,0,0.14025119543075562
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,2,128,1,float16,float16,0,0.14932960271835327
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,2,128,1,float16,fp8,0,0.14078400135040284
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,2,128,1,fp8,fp8,0,0.14032959938049316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,4,128,1,float16,float16,0,0.15057599544525146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,4,128,1,fp8,fp8,0,0.14088959693908693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,4,128,1,float16,fp8,0,0.13929760456085205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,8,128,1,float16,float16,0,0.1543344020843506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,8,128,1,float16,fp8,0,0.1395807981491089
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,8,128,1,fp8,fp8,0,0.14143520593643188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,48,128,1,float16,float16,0,0.10527839660644531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,48,128,1,float16,fp8,0,0.09932479858398438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,1,128,1,float16,float16,0,0.08317760229110718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,1,128,1,float16,fp8,0,0.08026080131530762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,1,128,1,fp8,fp8,0,0.08058720231056213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,48,128,1,fp8,fp8,0,0.17392319440841675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,2,128,1,float16,float16,0,0.08370559811592101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,2,128,1,float16,fp8,0,0.07884160280227662
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,2,128,1,fp8,fp8,0,0.08013439774513245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,4,128,1,float16,float16,0,0.08342400193214417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,4,128,1,float16,fp8,0,0.07993280291557311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,4,128,1,fp8,fp8,0,0.07915359735488892
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,8,128,1,float16,float16,0,0.08681759834289551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,8,128,1,float16,fp8,0,0.07929279804229736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,48,128,1,float16,float16,0,0.06047359704971313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,48,128,1,float16,fp8,0,0.05887519717216492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,48,128,1,fp8,fp8,0,0.05847679972648621
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,1,128,1,float16,float16,0,0.05215680003166199
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,1,128,1,float16,fp8,0,0.04997600018978119
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,1,128,1,fp8,fp8,0,0.04999360144138336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,2,128,1,float16,float16,0,0.051734399795532224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,2,128,1,float16,fp8,0,0.05024960041046143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,2,128,1,fp8,fp8,0,0.05033119916915894
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,48,128,1,fp8,fp8,0,0.09849119782447815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,4,128,1,float16,float16,0,0.05304800271987915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,4,128,1,float16,fp8,0,0.049779200553894044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,4,128,1,fp8,fp8,0,0.050672000646591185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,8,128,1,fp8,fp8,0,0.4901167869567871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,8,128,1,float16,fp8,0,0.05033439993858337
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,8,128,1,fp8,fp8,0,0.05003359913825989
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,8,128,1,fp8,fp8,0,0.08005599975585938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,1,128,1,float16,float16,0,1.973539161682129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,1,128,1,float16,fp8,0,2.0266319274902345
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,1,128,1,fp8,fp8,0,2.0160991668701174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,8,128,1,float16,float16,0,0.05334399938583374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,2,128,1,float16,float16,0,2.2190927505493163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,2,128,1,fp8,fp8,0,2.012071990966797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,4,128,1,float16,float16,0,2.167089653015137
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,4,128,1,float16,fp8,0,2.0362720489501953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,4,128,1,fp8,fp8,0,2.00928955078125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,2,128,1,float16,fp8,0,2.0215007781982424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,8,128,1,float16,float16,0,2.767380714416504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,8,128,1,float16,fp8,0,1.9988048553466797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,8,128,1,fp8,fp8,0,2.008483123779297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,48,128,1,float16,float16,0,1.5036640167236328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,48,128,1,float16,fp8,0,1.3730031967163085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,48,128,1,fp8,fp8,0,1.376315212249756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,1,128,1,float16,fp8,0,1.1318320274353026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,1,128,1,fp8,fp8,0,1.0193360328674317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,2,128,1,float16,float16,0,1.0047871589660644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,2,128,1,float16,fp8,0,1.0251248359680176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,2,128,1,fp8,fp8,0,1.0159199714660645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,4,128,1,float16,float16,0,1.1675567626953125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,4,128,1,float16,fp8,0,1.021190357208252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,4,128,1,fp8,fp8,0,1.0136048316955566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,1,128,1,float16,float16,0,1.0355648040771483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,8,128,1,float16,float16,0,1.1078224182128906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,8,128,1,float16,fp8,0,1.0193056106567382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,8,128,1,fp8,fp8,0,1.0110032081604003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,48,128,1,float16,float16,0,0.7654655933380127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,48,128,1,float16,fp8,0,0.80862398147583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,1,128,1,float16,float16,0,0.5244607925415039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,48,128,1,fp8,fp8,0,0.7023680210113525
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,1,128,1,float16,fp8,0,0.5267712116241455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,1,128,1,fp8,fp8,0,0.521233606338501
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,2,128,1,float16,float16,0,0.5470223903656006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,2,128,1,float16,fp8,0,0.5267519950866699
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,2,128,1,fp8,fp8,0,0.5237328052520752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,4,128,1,float16,float16,0,0.5284527778625489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,4,128,1,fp8,fp8,0,0.5259424209594726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,8,128,1,float16,float16,0,0.5600751876831055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,8,128,1,float16,fp8,0,0.5158224105834961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,8,128,1,fp8,fp8,0,0.5216512203216552
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,48,128,1,float16,float16,0,0.38979520797729494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,48,128,1,float16,fp8,0,0.36232800483703614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,1,128,1,float16,float16,0,0.2775984048843384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,48,128,1,fp8,fp8,0,0.36059999465942383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,1,128,1,float16,fp8,0,0.2739919900894165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,1,128,1,fp8,fp8,0,0.2709439992904663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,2,128,1,float16,float16,0,0.2778496026992798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,2,128,1,float16,fp8,0,0.27024641036987307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,2,128,1,fp8,fp8,0,0.27451200485229493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,4,128,1,float16,float16,0,0.27854559421539304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,4,128,1,float16,fp8,0,0.272870397567749
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,4,128,1,fp8,fp8,0,0.27005600929260254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,4,128,1,float16,fp8,0,0.6380576133728028
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,8,128,1,float16,float16,0,0.2923583984375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,8,128,1,fp8,fp8,0,0.268723201751709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,48,128,1,float16,float16,0,0.20701279640197753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,48,128,1,fp8,fp8,0,0.193505597114563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,1,128,1,float16,float16,0,0.15052160024642944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,1,128,1,float16,fp8,0,0.14669599533081054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,1,128,1,fp8,fp8,0,0.14715039730072021
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,2,128,1,float16,float16,0,0.15056960582733153
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,2,128,1,float16,fp8,0,0.14689760208129882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,2,128,1,fp8,fp8,0,0.1477679967880249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,4,128,1,float16,float16,0,0.15327680110931396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,4,128,1,float16,fp8,0,0.14776320457458497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,4,128,1,fp8,fp8,0,0.14749120473861693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,8,128,1,float16,float16,0,0.1580575942993164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,8,128,1,float16,fp8,0,0.14684159755706788
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,8,128,1,fp8,fp8,0,0.14606239795684814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,48,128,1,float16,fp8,0,0.19121919870376586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,48,128,1,float16,float16,0,0.11521600484848023
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,48,128,1,float16,fp8,0,0.10604000091552734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,48,128,1,fp8,fp8,0,0.10488959550857543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,1,128,1,float16,float16,0,0.08424320220947265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,1,128,1,float16,fp8,0,0.08091679811477662
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,1,128,1,fp8,fp8,0,0.0800704002380371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,2,128,1,float16,float16,0,0.08448479771614074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,2,128,1,float16,fp8,0,0.0814512014389038
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,2,128,1,fp8,fp8,0,0.07969440221786499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,4,128,1,float16,float16,0,0.08655200004577637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,4,128,1,float16,fp8,0,0.08166400194168091
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,8,128,1,float16,float16,0,0.08967199921607971
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,8,128,1,float16,fp8,0,0.08237599730491638
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,48,128,1,float16,float16,0,0.06756160259246827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,48,128,1,float16,fp8,0,0.06038879752159119
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,48,128,1,fp8,fp8,0,0.06015840172767639
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,1,128,1,float16,float16,0,0.049876800179481505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,1,128,1,float16,fp8,0,0.049456000328063965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,1,128,1,fp8,fp8,0,0.04952639937400818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,2,128,1,float16,float16,0,0.04981760084629059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,2,128,1,float16,fp8,0,0.049623998999595645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,8,128,1,float16,fp8,0,0.27023360729217527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,2,128,1,fp8,fp8,0,0.049572798609733584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,4,128,1,float16,float16,0,0.051209598779678345
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,4,128,1,float16,fp8,0,0.049851199984550475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,4,128,1,fp8,fp8,0,0.04952960014343262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,8,128,1,float16,float16,0,0.051875197887420656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,8,128,1,float16,fp8,0,0.04912000000476837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,48,128,1,float16,float16,0,0.03930880129337311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,48,128,1,float16,fp8,0,0.039243200421333314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,48,128,1,fp8,fp8,0,0.03922399878501892
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,4,128,1,fp8,fp8,0,0.07971360087394715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,1,128,1,float16,float16,0,0.0352975994348526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,1,128,1,float16,fp8,0,0.03308799862861633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,8,128,1,fp8,fp8,0,0.08096479773521423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,1,128,1,fp8,fp8,0,0.035041600465774536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,2,128,1,float16,float16,0,0.035123199224472046
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,2,128,1,float16,fp8,0,0.03504000008106232
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,2,128,1,fp8,fp8,0,0.03517920076847077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,4,128,1,float16,float16,0,0.035148799419403076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,4,128,1,float16,fp8,0,0.03504959940910339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,4,128,1,fp8,fp8,0,0.03519200086593628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,8,128,1,float16,fp8,0,0.03495039939880371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,8,128,1,fp8,fp8,0,0.034939199686050415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,1,128,1,float16,float16,0,1.4880319595336915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,1,128,1,float16,fp8,0,1.5460783958435058
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,8,128,1,fp8,fp8,0,0.04961119890213013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,1,128,1,fp8,fp8,0,1.6048784255981445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,8,128,1,float16,float16,0,0.03527680039405823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,2,128,1,float16,float16,0,1.4674367904663086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,2,128,1,float16,fp8,0,1.557595157623291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,2,128,1,fp8,fp8,0,1.549783992767334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,4,128,1,float16,float16,0,1.5101823806762695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,4,128,1,float16,fp8,0,1.655891227722168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,4,128,1,fp8,fp8,0,1.5554544448852539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,8,128,1,float16,float16,0,1.5992992401123047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,8,128,1,float16,fp8,0,1.5758079528808593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,8,128,1,fp8,fp8,0,1.5369759559631349
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,1,128,1,float16,float16,0,0.749560022354126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,48,128,1,fp8,fp8,0,1.1340031623840332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,1,128,1,float16,fp8,0,0.7842031955718994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,1,128,1,fp8,fp8,0,0.7847248077392578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,2,128,1,float16,float16,0,0.7467391967773438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,2,128,1,float16,fp8,0,0.7829535961151123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,48,128,1,float16,float16,0,1.2221728324890138
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,2,128,1,fp8,fp8,0,0.7821792125701904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,4,128,1,float16,float16,0,0.768225622177124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,4,128,1,float16,fp8,0,0.7873072147369384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,48,128,1,float16,fp8,0,1.256935977935791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,4,128,1,fp8,fp8,0,0.7802608013153076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,8,128,1,float16,float16,0,0.8046064376831055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,8,128,1,float16,fp8,0,0.7804975986480713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,8,128,1,fp8,fp8,0,0.7782368183135986
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,48,128,1,float16,float16,0,0.6798704147338868
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,1,128,1,float16,float16,0,0.3938767910003662
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,48,128,1,fp8,fp8,0,0.5776768207550049
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,1,128,1,float16,fp8,0,0.40195522308349607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,1,128,1,fp8,fp8,0,0.4010655879974365
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,2,128,1,float16,float16,0,0.387224006652832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,2,128,1,float16,fp8,0,0.40032320022583007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,2,128,1,fp8,fp8,0,0.4019968032836914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,4,128,1,float16,float16,0,0.3948319911956787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,4,128,1,float16,fp8,0,0.4003456115722656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,4,128,1,fp8,fp8,0,0.39957919120788576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,8,128,1,float16,float16,0,0.41560959815979004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,8,128,1,float16,fp8,0,0.39830400943756106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,8,128,1,fp8,fp8,0,0.39877920150756835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,48,128,1,float16,float16,0,0.31796319484710694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,48,128,1,float16,fp8,0,0.29817919731140136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,48,128,1,fp8,fp8,0,0.29890880584716795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,1,128,1,float16,float16,0,0.20477440357208251
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,1,128,1,fp8,fp8,0,0.20881919860839843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,2,128,1,float16,float16,0,0.20554718971252442
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,1,128,1,float16,fp8,0,0.21120960712432862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,2,128,1,float16,fp8,0,0.20930080413818358
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,2,128,1,fp8,fp8,0,0.21274240016937257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,4,128,1,float16,float16,0,0.21101601123809816
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,4,128,1,float16,fp8,0,0.20802080631256104
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,4,128,1,fp8,fp8,0,0.20921599864959717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,8,128,1,float16,float16,0,0.2213536024093628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,8,128,1,float16,fp8,0,0.20804638862609864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,8,128,1,fp8,fp8,0,0.20759520530700684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,48,128,1,float16,float16,0,0.16946079730987548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,48,128,1,float16,fp8,0,0.15805280208587646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,48,128,1,fp8,fp8,0,0.15678240060806276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,1,128,1,float16,fp8,0,0.11239360570907593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,1,128,1,fp8,fp8,0,0.11198400259017945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,2,128,1,float16,float16,0,0.11267999410629273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,2,128,1,fp8,fp8,0,0.11192799806594848
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,48,128,1,float16,fp8,0,0.5791759967803956
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,4,128,1,float16,float16,0,0.1129871964454651
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,4,128,1,float16,fp8,0,0.11344799995422364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,4,128,1,fp8,fp8,0,0.11240320205688477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,8,128,1,float16,fp8,0,0.11106239557266236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,8,128,1,fp8,fp8,0,0.11162400245666504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,48,128,1,float16,float16,0,0.09227200150489807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,48,128,1,float16,fp8,0,0.08624479770660401
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,48,128,1,fp8,fp8,0,0.08610399961471557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,1,128,1,float16,float16,0,0.060835200548172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,1,128,1,float16,fp8,0,0.06169599890708923
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,1,128,1,fp8,fp8,0,0.061382400989532473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,2,128,1,float16,float16,0,0.061787199974060056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,2,128,1,float16,fp8,0,0.06179680228233338
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,2,128,1,fp8,fp8,0,0.06175360083580017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,2,128,1,float16,fp8,0,0.11264159679412841
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,4,128,1,float16,float16,0,0.06371359825134278
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,4,128,1,float16,fp8,0,0.061737602949142455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,4,128,1,fp8,fp8,0,0.061857599020004275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,8,128,1,float16,float16,0,0.06649439930915832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,8,128,1,float16,fp8,0,0.0620464026927948
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,8,128,1,fp8,fp8,0,0.06196799874305725
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,8,128,1,float16,float16,0,0.12044800519943237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,48,128,1,float16,fp8,0,0.049497601389884946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,48,128,1,fp8,fp8,0,0.04936479926109314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,1,128,1,float16,float16,0,0.0372624009847641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,1,128,1,float16,fp8,0,0.03864319920539856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,1,128,1,fp8,fp8,0,0.03728640079498291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,2,128,1,float16,float16,0,0.038899201154708865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,2,128,1,float16,fp8,0,0.03722560107707977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,4,128,1,float16,float16,0,0.038038399815559384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,4,128,1,float16,fp8,0,0.03717760145664215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,4,128,1,fp8,fp8,0,0.03723520040512085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,8,128,1,float16,float16,0,0.03925440013408661
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,8,128,1,float16,fp8,0,0.037206399440765384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,8,128,1,fp8,fp8,0,0.03720960021018982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,48,128,1,float16,float16,0,0.03041279911994934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,48,128,1,float16,fp8,0,0.031116798520088196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,1,128,1,float16,float16,0,0.1123568058013916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,48,128,1,fp8,fp8,0,0.03248000144958496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,1,128,1,float16,fp8,0,0.026876801252365114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,1,128,1,fp8,fp8,0,0.027009600400924684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,2,128,1,float16,float16,0,0.02698880136013031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,2,128,1,float16,fp8,0,0.026867198944091796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,2,128,1,fp8,fp8,0,0.026897600293159483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,48,128,1,float16,float16,0,0.0541055977344513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,4,128,1,float16,float16,0,0.02696479856967926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,4,128,1,float16,fp8,0,0.02705279886722565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,4,128,1,fp8,fp8,0,0.027088001370429993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,8,128,1,float16,float16,0,0.027033600211143493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,8,128,1,float16,fp8,0,0.02699039876461029
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,8,128,1,fp8,fp8,0,0.02757439911365509
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,2,128,1,fp8,fp8,0,0.03825120031833649
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,48,128,1,float16,fp8,0,0.02078399956226349
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,48,128,1,fp8,fp8,0,0.020776000618934632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,1,128,1,float16,fp8,0,0.018755200505256652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,1,128,1,float16,float16,0,0.01870879977941513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,1,128,1,fp8,fp8,0,0.018665599822998046
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,2,128,1,float16,float16,0,0.01875839978456497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,2,128,1,float16,fp8,0,0.018671999871730804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,2,128,1,fp8,fp8,0,0.01865919977426529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,4,128,1,float16,float16,0,0.018783999979496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,4,128,1,float16,fp8,0,0.01882079988718033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,4,128,1,fp8,fp8,0,0.018795199692249298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,8,128,1,float16,float16,0,0.01873439997434616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,8,128,1,float16,fp8,0,0.018822400271892546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,8,128,1,fp8,fp8,0,0.018641600012779237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,1,128,1,float16,float16,0,0.027091199159622194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,1,128,1,float16,float16,0,0.6176320075988769
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,1,128,1,float16,fp8,0,0.6635424137115479
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,48,128,1,float16,float16,0,0.020771199464797975
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,1,128,1,fp8,fp8,0,0.659881591796875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,2,128,1,float16,float16,0,0.6156576156616211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,2,128,1,float16,fp8,0,0.6557456016540527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,2,128,1,fp8,fp8,0,0.6579520225524902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,4,128,1,float16,float16,0,0.6339551925659179
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,4,128,1,float16,fp8,0,0.6544479846954345
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,4,128,1,fp8,fp8,0,0.6562863826751709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,8,128,1,float16,float16,0,0.6717648029327392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,8,128,1,float16,fp8,0,0.6527408123016357
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,8,128,1,fp8,fp8,0,0.6557024002075196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,48,128,1,float16,float16,0,0.5469999790191651
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,48,128,1,float16,fp8,0,0.5100687980651856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,48,128,1,fp8,fp8,0,0.5132575988769531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,1,128,1,float16,float16,0,0.31901280879974364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,1,128,1,float16,fp8,0,0.33513760566711426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,1,128,1,fp8,fp8,0,0.340339207649231
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,2,128,1,float16,float16,0,0.3128112077713013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,2,128,1,float16,fp8,0,0.34041759967803953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,2,128,1,fp8,fp8,0,0.33492159843444824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,4,128,1,float16,fp8,0,0.3339263916015625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,8,128,1,float16,fp8,0,0.33372480869293214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,8,128,1,fp8,fp8,0,0.3338223934173584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,48,128,1,float16,float16,0,0.2800096035003662
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,48,128,1,float16,fp8,0,0.2640575885772705
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,48,128,1,fp8,fp8,0,0.26440160274505614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,1,128,1,float16,float16,0,0.16699999570846558
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,4,128,1,float16,float16,0,0.32828640937805176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,1,128,1,float16,fp8,0,0.17653599977493287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,1,128,1,fp8,fp8,0,0.17931360006332397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,2,128,1,float16,float16,0,0.16684319972991943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,8,128,1,float16,float16,0,0.3415424108505249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,2,128,1,float16,fp8,0,0.17887840270996094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,2,128,1,fp8,fp8,0,0.1756384015083313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,4,128,1,float16,float16,0,0.17076799869537354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,4,128,1,float16,fp8,0,0.1782304048538208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,4,128,1,fp8,fp8,0,0.1761199951171875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,8,128,1,float16,float16,0,0.17927520275115966
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,8,128,1,float16,fp8,0,0.17692480087280274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,8,128,1,fp8,fp8,0,0.17580640316009521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,48,128,1,float16,fp8,0,0.1420688033103943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,1,128,1,float16,float16,0,0.09481599926948547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,48,128,1,fp8,fp8,0,0.14030239582061768
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,1,128,1,float16,fp8,0,0.09703999757766724
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,1,128,1,fp8,fp8,0,0.09651679992675781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,2,128,1,float16,float16,0,0.09416000247001648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,2,128,1,float16,fp8,0,0.0960864007472992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,2,128,1,fp8,fp8,0,0.09664959907531738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,4,128,1,float16,float16,0,0.09472960233688354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,4,128,1,float16,fp8,0,0.09681439995765687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,8,128,1,float16,float16,0,0.10080480575561523
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,8,128,1,float16,fp8,0,0.096697598695755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,8,128,1,fp8,fp8,0,0.09666240215301514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,48,128,1,float16,float16,0,0.0839407980442047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,48,128,1,float16,fp8,0,0.07848640084266663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,48,128,1,fp8,fp8,0,0.07952479720115661
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,4,128,1,fp8,fp8,0,0.3382623910903931
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,1,128,1,float16,fp8,0,0.05400639772415161
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,1,128,1,fp8,fp8,0,0.053788799047470096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,2,128,1,float16,float16,0,0.05402719974517822
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,2,128,1,float16,fp8,0,0.055604797601699826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,48,128,1,float16,float16,0,0.1483791947364807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,2,128,1,fp8,fp8,0,0.055491197109222415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,4,128,1,float16,float16,0,0.05432800054550171
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,4,128,1,float16,fp8,0,0.055508798360824584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,8,128,1,float16,float16,0,0.05804160237312317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,8,128,1,float16,fp8,0,0.055667197704315184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,8,128,1,fp8,fp8,0,0.055606400966644286
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,4,128,1,fp8,fp8,0,0.09587200284004212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,48,128,1,float16,fp8,0,0.04327200055122375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,48,128,1,fp8,fp8,0,0.04343520104885101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,1,128,1,float16,float16,0,0.031007999181747438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,1,128,1,float16,fp8,0,0.03110400140285492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,1,128,1,fp8,fp8,0,0.030987200140953065
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,1,128,1,float16,float16,0,0.053660798072814944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,2,128,1,float16,fp8,0,0.031070399284362792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,2,128,1,fp8,fp8,0,0.03144319951534271
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,4,128,1,float16,float16,0,0.031115201115608216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,4,128,1,float16,fp8,0,0.031057599186897277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,4,128,1,fp8,fp8,0,0.031052801012992858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,8,128,1,float16,float16,0,0.03290719985961914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,8,128,1,float16,fp8,0,0.031043198704719544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,8,128,1,fp8,fp8,0,0.03120799958705902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,4,128,1,fp8,fp8,0,0.053547197580337526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,48,128,1,float16,float16,0,0.026977598667144775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,48,128,1,float16,fp8,0,0.029051199555397034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,48,128,1,fp8,fp8,0,0.029016000032424927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,1,128,1,float16,float16,0,0.02281759977340698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,1,128,1,float16,fp8,0,0.02284640073776245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,1,128,1,fp8,fp8,0,0.02300640046596527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,2,128,1,float16,float16,0,0.022865599393844603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,48,128,1,float16,float16,0,0.04749279916286468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,2,128,1,float16,fp8,0,0.02340639978647232
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,2,128,1,fp8,fp8,0,0.02298240065574646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,4,128,1,float16,float16,0,0.022998400032520294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,4,128,1,float16,fp8,0,0.022939200699329376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,4,128,1,fp8,fp8,0,0.022811199724674224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,8,128,1,float16,float16,0,0.023187200725078582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,8,128,1,float16,fp8,0,0.02292319983243942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,8,128,1,fp8,fp8,0,0.023028799891471864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,48,128,1,float16,float16,0,0.01887200027704239
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,48,128,1,float16,fp8,0,0.0188400000333786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,48,128,1,fp8,fp8,0,0.01876319944858551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,2,128,1,float16,float16,0,0.031038400530815125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,1,128,1,float16,float16,0,0.016710400581359863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,1,128,1,float16,fp8,0,0.01736319959163666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,1,128,1,fp8,fp8,0,0.01642560064792633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,2,128,1,float16,fp8,0,0.016711999475955964
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,2,128,1,fp8,fp8,0,0.016734400391578676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,4,128,1,float16,float16,0,0.016787199676036833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,4,128,1,float16,fp8,0,0.016816000640392303
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,4,128,1,fp8,fp8,0,0.016705599427223206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,8,128,1,float16,fp8,0,0.01671359986066818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,8,128,1,float16,float16,0,0.016705599427223206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,8,128,1,fp8,fp8,0,0.016806399822235106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,48,128,1,float16,fp8,0,0.016832000017166136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,48,128,1,fp8,fp8,0,0.01679839938879013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,1,128,1,float16,float16,0,0.014742399752140044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,1,128,1,float16,fp8,0,0.01483680009841919
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,1,128,1,fp8,fp8,0,0.014761599898338317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,2,128,1,float16,float16,0,0.01467519998550415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,2,128,1,float16,fp8,0,0.014744000136852264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,2,128,1,fp8,fp8,0,0.014833599328994751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,4,128,1,float16,float16,0,0.014822399616241455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,4,128,1,float16,fp8,0,0.0147599995136261
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,4,128,1,fp8,fp8,0,0.014744000136852264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,2,128,1,float16,float16,0,0.016654400527477263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,8,128,1,float16,float16,0,0.014950400590896607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,8,128,1,float16,fp8,0,0.014742399752140044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,8,128,1,fp8,fp8,0,0.014711999893188476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,1,128,1,float16,float16,0,0.3767616033554077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,1,128,1,float16,fp8,0,0.39693119525909426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,1,128,1,fp8,fp8,0,0.3994992017745972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,2,128,1,float16,float16,0,0.3771872043609619
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,2,128,1,float16,fp8,0,0.395849609375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,2,128,1,fp8,fp8,0,0.39809279441833495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,4,128,1,float16,float16,0,0.3861407995223999
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,4,128,1,float16,fp8,0,0.39484639167785646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,4,128,1,fp8,fp8,0,0.3970367908477783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,8,128,1,float16,float16,0,0.4046976089477539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,8,128,1,float16,fp8,0,0.39392800331115724
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,48,128,1,float16,float16,0,0.30658879280090334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,48,128,1,float16,fp8,0,0.29007840156555176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,1,128,1,float16,float16,0,0.19334880113601685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,48,128,1,fp8,fp8,0,0.2936079978942871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,1,128,1,float16,fp8,0,0.20338239669799804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,1,128,1,fp8,fp8,0,0.20610239505767822
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,2,128,1,float16,float16,0,0.19334720373153685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,2,128,1,float16,fp8,0,0.2053999900817871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,48,128,1,float16,float16,0,0.01674560010433197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,4,128,1,float16,float16,0,0.20052640438079833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,4,128,1,float16,fp8,0,0.2021791934967041
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,4,128,1,fp8,fp8,0,0.20546400547027588
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,8,128,1,float16,float16,0,0.2066864013671875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,8,128,1,float16,fp8,0,0.2053920030593872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,8,128,1,fp8,fp8,0,0.2030639886856079
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,48,128,1,float16,float16,0,0.1588495969772339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,48,128,1,float16,fp8,0,0.1519871950149536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,48,128,1,fp8,fp8,0,0.1536080002784729
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,1,128,1,float16,float16,0,0.10452320575714111
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,1,128,1,float16,fp8,0,0.10925600528717042
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,1,128,1,fp8,fp8,0,0.10828640460968017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,2,128,1,float16,float16,0,0.10527520179748535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,2,128,1,float16,fp8,0,0.10815680027008057
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,4,128,1,float16,float16,0,0.10648479461669921
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,4,128,1,float16,fp8,0,0.10814720392227173
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,4,128,1,fp8,fp8,0,0.10792640447616578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,8,128,1,float16,float16,0,0.11053919792175293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,8,128,1,float16,fp8,0,0.10764000415802003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,8,128,1,fp8,fp8,0,0.10701600313186646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,2,128,1,fp8,fp8,0,0.2032543897628784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,48,128,1,float16,float16,0,0.08438720107078553
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,48,128,1,float16,fp8,0,0.08226079940795898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,48,128,1,fp8,fp8,0,0.0822928011417389
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,1,128,1,float16,float16,0,0.05760319828987122
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,1,128,1,fp8,fp8,0,0.05899199843406677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,2,128,1,float16,float16,0,0.05795360207557678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,2,128,1,float16,fp8,0,0.057766401767730714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,2,128,1,fp8,fp8,0,0.0576911985874176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,4,128,1,float16,float16,0,0.0595088005065918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,4,128,1,float16,fp8,0,0.05772320032119751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,8,128,1,fp8,fp8,0,0.39644479751586914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,4,128,1,fp8,fp8,0,0.058736002445220946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,8,128,1,float16,float16,0,0.062115198373794554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,2,128,1,fp8,fp8,0,0.10920000076293945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,8,128,1,fp8,fp8,0,0.05961599946022034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,48,128,1,float16,float16,0,0.05018879771232605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,48,128,1,float16,fp8,0,0.04673439860343933
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,48,128,1,fp8,fp8,0,0.04731999933719635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,1,128,1,float16,float16,0,0.03469119966030121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,1,128,1,float16,fp8,0,0.0351967990398407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,1,128,1,fp8,fp8,0,0.03512639999389648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,2,128,1,float16,float16,0,0.03518719971179962
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,2,128,1,float16,fp8,0,0.03516960144042969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,2,128,1,fp8,fp8,0,0.035175999999046324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,4,128,1,float16,float16,0,0.03514559864997864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,1,128,1,float16,fp8,0,0.05972319841384888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,4,128,1,float16,fp8,0,0.035150399804115294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,4,128,1,fp8,fp8,0,0.03516159951686859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,8,128,1,float16,float16,0,0.03650560081005096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,8,128,1,float16,fp8,0,0.035158398747444156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,8,128,1,fp8,fp8,0,0.03519999980926514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,48,128,1,float16,float16,0,0.024940800666809083
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,48,128,1,float16,fp8,0,0.02696160078048706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,48,128,1,fp8,fp8,0,0.027075201272964478
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,1,128,1,float16,float16,0,0.021087999641895293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,1,128,1,float16,fp8,0,0.021955199539661407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,1,128,1,fp8,fp8,0,0.022526399791240694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,2,128,1,float16,float16,0,0.020878399908542632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,8,128,1,float16,fp8,0,0.059803199768066403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,2,128,1,fp8,fp8,0,0.02235199958086014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,4,128,1,float16,float16,0,0.021185599267482758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,4,128,1,float16,fp8,0,0.021244800090789794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,4,128,1,fp8,fp8,0,0.022867199778556824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,8,128,1,float16,float16,0,0.022836799919605254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,8,128,1,float16,fp8,0,0.020819200575351714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,8,128,1,fp8,fp8,0,0.022859199345111846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,48,128,1,float16,float16,0,0.018764799833297728
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,48,128,1,float16,fp8,0,0.019118399918079378
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,48,128,1,fp8,fp8,0,0.01879200041294098
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,1,128,1,float16,float16,0,0.016763199865818024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,1,128,1,float16,fp8,0,0.01677920073270798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,1,128,1,fp8,fp8,0,0.01671839952468872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,2,128,1,float16,float16,0,0.016707199811935424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,2,128,1,float16,fp8,0,0.016752000153064727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,2,128,1,fp8,fp8,0,0.016654400527477263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,4,128,1,float16,fp8,0,0.01675039976835251
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,4,128,1,fp8,fp8,0,0.016734400391578676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,8,128,1,float16,float16,0,0.016659200191497803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,8,128,1,float16,fp8,0,0.016655999422073364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,8,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,48,128,1,float16,float16,0,0.014558400213718414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,48,128,1,float16,fp8,0,0.014561599493026734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,48,128,1,fp8,fp8,0,0.014574399590492249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,1,128,1,float16,float16,0,0.01250080019235611
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,1,128,1,float16,fp8,0,0.012518399953842163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,2,128,1,float16,fp8,0,0.022620800137519836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,2,128,1,float16,float16,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,2,128,1,float16,fp8,0,0.012542399764060973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,2,128,1,fp8,fp8,0,0.012579199671745301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,4,128,1,float16,float16,0,0.012577599287033081
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,4,128,1,float16,fp8,0,0.01255040019750595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,4,128,1,fp8,fp8,0,0.01257600039243698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,8,128,1,float16,float16,0,0.012544000148773193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,8,128,1,float16,fp8,0,0.012572799623012543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,8,128,1,fp8,fp8,0,0.012559999525547028
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,48,128,1,float16,float16,0,0.012558400630950928
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,48,128,1,float16,fp8,0,0.012515200674533844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,48,128,1,fp8,fp8,0,0.012636800110340119
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,4,128,1,float16,float16,0,0.01658879965543747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,1,128,1,float16,float16,0,0.012401600182056428
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,1,128,1,float16,fp8,0,0.01249919980764389
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,1,128,1,fp8,fp8,0,0.012398400157690049
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,2,128,1,float16,float16,0,0.01239520013332367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,2,128,1,float16,fp8,0,0.012479999661445617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,2,128,1,fp8,fp8,0,0.012412799894809723
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,4,128,1,float16,float16,0,0.012432000041007996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,4,128,1,float16,fp8,0,0.012436799705028534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,4,128,1,fp8,fp8,0,0.012404800206422806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,8,128,1,float16,float16,0,0.011451199650764465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,8,128,1,float16,fp8,0,0.012443199753761292
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,8,128,1,fp8,fp8,0,0.012377600371837615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,1,128,1,float16,float16,0,0.29677600860595704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,1,128,1,float16,fp8,0,0.30229599475860597
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,1,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,1,128,1,fp8,fp8,0,0.3041055917739868
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,2,128,1,float16,fp8,0,0.3047647953033447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,2,128,1,fp8,fp8,0,0.3016288042068481
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,4,128,1,float16,float16,0,0.3001919984817505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,4,128,1,float16,fp8,0,0.3043328046798706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,4,128,1,fp8,fp8,0,0.3015439987182617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,8,128,1,float16,float16,0,0.30976641178131104
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,8,128,1,float16,fp8,0,0.30420799255371095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,48,128,1,float16,float16,0,0.20926239490509033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,48,128,1,float16,fp8,0,0.20241599082946776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,48,128,1,fp8,fp8,0,0.20108160972595215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,1,128,1,float16,float16,0,0.15615999698638916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,1,128,1,float16,fp8,0,0.15820319652557374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,1,128,1,fp8,fp8,0,0.15852160453796388
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,2,128,1,float16,fp8,0,0.1581663966178894
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,2,128,1,fp8,fp8,0,0.15818079710006713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,4,128,1,float16,float16,0,0.15830399990081787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,4,128,1,float16,fp8,0,0.15817919969558716
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,2,128,1,float16,float16,0,0.29320321083068845
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,8,128,1,float16,float16,0,0.16036640405654906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,8,128,1,float16,fp8,0,0.15589280128479005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,8,128,1,fp8,fp8,0,0.1561295986175537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,8,128,1,fp8,fp8,0,0.3003216028213501
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,48,128,1,float16,float16,0,0.10815999507904053
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,48,128,1,float16,fp8,0,0.10495200157165527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,48,128,1,fp8,fp8,0,0.10497280359268188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,1,128,1,float16,float16,0,0.08242560029029847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,1,128,1,float16,fp8,0,0.0821344017982483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,2,128,1,float16,float16,0,0.1563472032546997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,1,128,1,fp8,fp8,0,0.08220000267028808
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,2,128,1,float16,float16,0,0.08454560041427613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,2,128,1,float16,fp8,0,0.08219360113143921
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,2,128,1,fp8,fp8,0,0.08414080142974853
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,4,128,1,float16,float16,0,0.08392639756202698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,4,128,1,float16,fp8,0,0.08420799970626831
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,4,128,1,fp8,fp8,0,0.08220639824867249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,8,128,1,float16,float16,0,0.0887503981590271
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,8,128,1,float16,fp8,0,0.08219199776649475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,8,128,1,fp8,fp8,0,0.08458560109138488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,48,128,1,float16,float16,0,0.06018239855766296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,48,128,1,float16,fp8,0,0.05776960253715515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,48,128,1,fp8,fp8,0,0.057678401470184326
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,1,128,1,float16,float16,0,0.04565120041370392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,1,128,1,float16,fp8,0,0.045347198843955994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,1,128,1,fp8,fp8,0,0.047286400198936464
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,2,128,1,float16,float16,0,0.04556320011615753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,2,128,1,fp8,fp8,0,0.04636319875717163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,4,128,1,float16,float16,0,0.045660799741744994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,4,128,1,float16,fp8,0,0.04635840058326721
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,4,128,1,fp8,fp8,0,0.04668639898300171
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,8,128,1,float16,float16,0,0.047310400009155276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,8,128,1,float16,fp8,0,0.04671039879322052
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,8,128,1,fp8,fp8,0,0.04651199877262115
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,48,128,1,float16,float16,0,0.030921599268913268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,48,128,1,float16,fp8,0,0.032979199290275575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,48,128,1,fp8,fp8,0,0.03306719958782196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,1,128,1,float16,float16,0,0.027345600724220275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,1,128,1,float16,fp8,0,0.028859201073646545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,1,128,1,fp8,fp8,0,0.02885279953479767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,2,128,1,float16,float16,0,0.027753600478172304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,2,128,1,float16,fp8,0,0.028835201263427736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,2,128,1,fp8,fp8,0,0.02881920039653778
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,4,128,1,float16,float16,0,0.028911998867988585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,4,128,1,float16,fp8,0,0.0288783997297287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,4,128,1,fp8,fp8,0,0.0288783997297287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,8,128,1,float16,float16,0,0.028865599632263185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,8,128,1,float16,fp8,0,0.028881600499153136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,8,128,1,fp8,fp8,0,0.028896000981330872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,48,128,1,float16,float16,0,0.020652799308300017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,2,128,1,float16,fp8,0,0.04735519886016846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,48,128,1,float16,fp8,0,0.020708799362182617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,1,128,1,float16,float16,0,0.01863040030002594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,48,128,1,fp8,fp8,0,0.02080159932374954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,1,128,1,float16,fp8,0,0.018614399433135986
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,1,128,1,fp8,fp8,0,0.018671999871730804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,2,128,1,float16,float16,0,0.01857919991016388
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,2,128,1,float16,fp8,0,0.01873600035905838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,2,128,1,fp8,fp8,0,0.018648000061511995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,4,128,1,float16,float16,0,0.018699200451374055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,4,128,1,float16,fp8,0,0.018622399866580965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,4,128,1,fp8,fp8,0,0.018638400733470915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,8,128,1,float16,float16,0,0.01865759938955307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,8,128,1,float16,fp8,0,0.018617600202560425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,8,128,1,fp8,fp8,0,0.018643200397491455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,48,128,1,float16,float16,0,0.01650879979133606
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,48,128,1,float16,fp8,0,0.016579200327396394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,48,128,1,fp8,fp8,0,0.01656000018119812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,1,128,1,float16,fp8,0,0.014705599844455719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,1,128,1,fp8,fp8,0,0.014627200365066529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,2,128,1,float16,float16,0,0.014577600359916686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,2,128,1,float16,fp8,0,0.014588800072669984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,2,128,1,fp8,fp8,0,0.014582400023937226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,4,128,1,float16,float16,0,0.014627200365066529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,4,128,1,float16,fp8,0,0.014614400267601014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,4,128,1,fp8,fp8,0,0.014630399644374847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,8,128,1,float16,float16,0,0.014556799829006196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,8,128,1,float16,fp8,0,0.014697599411010741
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,8,128,1,fp8,fp8,0,0.014616000652313232
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,48,128,1,float16,float16,0,0.012600000202655792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,4,128,1,fp8,fp8,0,0.1582335948944092
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,48,128,1,float16,fp8,0,0.0124208003282547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,48,128,1,fp8,fp8,0,0.01266240030527115
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,1,128,1,float16,float16,0,0.010595200210809707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,1,128,1,float16,fp8,0,0.01247519999742508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,1,128,1,fp8,fp8,0,0.01165279969573021
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,2,128,1,float16,float16,0,0.012439999729394913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,2,128,1,float16,fp8,0,0.012035199999809265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,2,128,1,fp8,fp8,0,0.01247519999742508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,4,128,1,float16,float16,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,4,128,1,float16,fp8,0,0.012068799883127212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,4,128,1,fp8,fp8,0,0.011512000113725662
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,8,128,1,float16,float16,0,0.012089599668979645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,8,128,1,float16,fp8,0,0.012256000190973282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,8,128,1,fp8,fp8,0,0.012256000190973282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,48,128,1,float16,float16,0,0.01250080019235611
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,48,128,1,float16,fp8,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,48,128,1,fp8,fp8,0,0.011860799789428712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,1,128,1,float16,float16,0,0.010523200035095215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,1,128,1,float16,fp8,0,0.010527999699115753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,1,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,2,128,1,float16,float16,0,0.010688000172376633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,2,128,1,float16,fp8,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,1,128,1,float16,float16,0,0.014547200500965118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,4,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,4,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,4,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,8,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,8,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,8,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,1,128,1,float16,float16,0,0.2546544075012207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,1,128,1,float16,fp8,0,0.2578687906265259
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,1,128,1,fp8,fp8,0,0.2573359966278076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,2,128,1,float16,float16,0,0.2547663927078247
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,2,128,1,float16,fp8,0,0.2569375991821289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,2,128,1,fp8,fp8,0,0.25782079696655275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,4,128,1,float16,float16,0,0.2562416076660156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,4,128,1,float16,fp8,0,0.25661280155181887
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,4,128,1,fp8,fp8,0,0.256659197807312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,2,128,1,fp8,fp8,0,0.010542400181293488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,8,128,1,float16,float16,0,0.26067678928375243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,8,128,1,float16,fp8,0,0.2564768075942993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,48,128,1,float16,float16,0,0.15834720134735109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,8,128,1,fp8,fp8,0,0.2576368093490601
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,48,128,1,float16,fp8,0,0.1547919988632202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,48,128,1,fp8,fp8,0,0.1555392026901245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,1,128,1,float16,float16,0,0.13589119911193848
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,1,128,1,fp8,fp8,0,0.1315791964530945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,2,128,1,float16,fp8,0,0.13216480016708373
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,2,128,1,fp8,fp8,0,0.13226079940795898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,4,128,1,float16,float16,0,0.13527519702911378
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,4,128,1,float16,fp8,0,0.13284159898757936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,4,128,1,fp8,fp8,0,0.13249119520187377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,8,128,1,float16,float16,0,0.1371664047241211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,8,128,1,float16,fp8,0,0.1346127986907959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,8,128,1,fp8,fp8,0,0.131713604927063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,48,128,1,float16,float16,0,0.08611199855804444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,48,128,1,float16,fp8,0,0.08223999738693237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,48,128,1,fp8,fp8,0,0.08270879983901977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,1,128,1,float16,float16,0,0.07036640048027039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,1,128,1,float16,fp8,0,0.07194399833679199
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,1,128,1,fp8,fp8,0,0.070004802942276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,2,128,1,float16,float16,0,0.07130560278892517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,2,128,1,float16,fp8,0,0.07003520131111145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,2,128,1,fp8,fp8,0,0.07196320295333862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,1,128,1,float16,fp8,0,0.13159359693527223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,4,128,1,float16,fp8,0,0.07197279930114746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,4,128,1,fp8,fp8,0,0.06988959908485412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,8,128,1,float16,float16,0,0.07211999893188477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,8,128,1,float16,fp8,0,0.06992319822311402
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,8,128,1,fp8,fp8,0,0.07196320295333862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,48,128,1,float16,float16,0,0.0435232013463974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,48,128,1,float16,fp8,0,0.04738720059394837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,48,128,1,fp8,fp8,0,0.045228800177574156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,1,128,1,float16,float16,0,0.04127680063247681
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,1,128,1,float16,fp8,0,0.03930079936981201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,2,128,1,float16,float16,0,0.03996480107307434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,1,128,1,fp8,fp8,0,0.041300800442695615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,2,128,1,float16,fp8,0,0.039243200421333314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,2,128,1,fp8,fp8,0,0.04112319946289063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,4,128,1,float16,float16,0,0.0412416011095047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,4,128,1,float16,fp8,0,0.04126879870891571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,4,128,1,fp8,fp8,0,0.041075199842453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,8,128,1,float16,float16,0,0.04132800102233887
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,8,128,1,float16,fp8,0,0.040884798765182494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,8,128,1,fp8,fp8,0,0.041172799468040464
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,48,128,1,float16,float16,0,0.026894399523735048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,48,128,1,float16,fp8,0,0.028803199529647827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,48,128,1,fp8,fp8,0,0.026915198564529418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,1,128,1,float16,float16,0,0.024886399507522583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,1,128,1,float16,fp8,0,0.024804799258708952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,1,128,1,fp8,fp8,0,0.02489120066165924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,2,128,1,float16,float16,0,0.024851199984550477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,2,128,1,float16,fp8,0,0.02493920028209686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,4,128,1,float16,float16,0,0.02512960135936737
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,4,128,1,float16,fp8,0,0.024911999702453613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,4,128,1,fp8,fp8,0,0.02483679950237274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,8,128,1,float16,float16,0,0.026366400718688964
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,8,128,1,float16,fp8,0,0.026387199759483337
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,8,128,1,fp8,fp8,0,0.025596800446510314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,4,128,1,float16,float16,0,0.07109760046005249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,48,128,1,float16,float16,0,0.018671999871730804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,48,128,1,float16,fp8,0,0.018694399297237395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,2,128,1,float16,float16,0,0.13541760444641113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,48,128,1,fp8,fp8,0,0.018755200505256652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,1,128,1,float16,float16,0,0.016739200055599212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,1,128,1,float16,fp8,0,0.01676799952983856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,1,128,1,fp8,fp8,0,0.01675360053777695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,2,128,1,float16,float16,0,0.016702400147914888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,2,128,1,fp8,fp8,0,0.016739200055599212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,2,128,1,float16,fp8,0,0.01669120043516159
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,4,128,1,float16,float16,0,0.016752000153064727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,4,128,1,float16,fp8,0,0.016732800006866454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,4,128,1,fp8,fp8,0,0.016627199947834015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,8,128,1,float16,float16,0,0.016836799681186676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,8,128,1,float16,fp8,0,0.016641600430011748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,8,128,1,fp8,fp8,0,0.01672320067882538
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,48,128,1,float16,float16,0,0.014563199877738953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,48,128,1,float16,fp8,0,0.014496000111103058
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,48,128,1,fp8,fp8,0,0.01467680037021637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,1,128,1,float16,float16,0,0.014070400595664978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,1,128,1,fp8,fp8,0,0.014507199823856353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,2,128,1,float16,float16,0,0.013737599551677703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,2,128,1,fp8,fp8,0,0.014563199877738953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,4,128,1,float16,float16,0,0.014185599982738495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,2,128,1,fp8,fp8,0,0.024828800559043886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,4,128,1,float16,fp8,0,0.01454399973154068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,4,128,1,fp8,fp8,0,0.014552000164985656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,8,128,1,float16,float16,0,0.014348800480365752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,8,128,1,float16,fp8,0,0.014553600549697876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,8,128,1,fp8,fp8,0,0.014454400539398194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,48,128,1,float16,float16,0,0.012486399710178375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,48,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,48,128,1,fp8,fp8,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,1,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,1,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,1,128,1,fp8,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,2,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,2,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,2,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,4,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,4,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,2,128,1,float16,fp8,0,0.014507199823856353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,4,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,8,128,1,float16,float16,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,8,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,8,128,1,fp8,fp8,0,0.01061279997229576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,48,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,48,128,1,fp8,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,1,128,1,float16,float16,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,1,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,1,128,1,fp8,fp8,0,0.010518400371074677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,2,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,2,128,1,float16,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,2,128,1,fp8,fp8,0,0.010654400289058685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,4,128,1,float16,float16,0,0.01064639985561371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,4,128,1,float16,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,4,128,1,fp8,fp8,0,0.010523200035095215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,8,128,1,float16,float16,0,0.010571199655532836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,8,128,1,float16,fp8,0,0.010583999752998351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,8,128,1,fp8,fp8,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,1,128,1,float16,float16,0,0.24814560413360595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,1,128,1,float16,fp8,0,0.0154448002576828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,1,128,1,float16,fp8,0,0.23579680919647217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,1,128,1,fp8,fp8,0,0.23614718914031982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,48,128,1,float16,float16,0,0.012198399752378464
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,2,128,1,float16,float16,0,0.24695839881896972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,2,128,1,float16,fp8,0,0.23623840808868407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,2,128,1,fp8,fp8,0,0.23377280235290526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,4,128,1,float16,float16,0,0.24815518856048585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,4,128,1,float16,fp8,0,0.23626399040222168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,4,128,1,fp8,fp8,0,0.23407359123229982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,8,128,1,float16,float16,0,0.25048160552978516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,8,128,1,fp8,fp8,0,0.2369744062423706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,48,128,1,float16,fp8,0,0.1335919976234436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,48,128,1,fp8,fp8,0,0.13511359691619873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,1,128,1,float16,float16,0,0.12874239683151245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,1,128,1,float16,fp8,0,0.1227120041847229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,1,128,1,fp8,fp8,0,0.12123199701309204
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,2,128,1,float16,float16,0,0.127947199344635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,2,128,1,float16,fp8,0,0.12167520523071289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,2,128,1,fp8,fp8,0,0.12169920206069947
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,4,128,1,float16,float16,0,0.1286255955696106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,4,128,1,float16,fp8,0,0.12170720100402832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,4,128,1,fp8,fp8,0,0.12134079933166504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,8,128,1,float16,float16,0,0.12993119955062865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,8,128,1,float16,fp8,0,0.12125600576400757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,8,128,1,fp8,fp8,0,0.12226400375366211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,48,128,1,float16,float16,0,0.07439039945602417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,48,128,1,float16,fp8,0,0.07178559899330139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,48,128,1,fp8,fp8,0,0.0703216016292572
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,1,128,1,float16,float16,0,0.06975359916687011
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,1,128,1,float16,fp8,0,0.06585599780082703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,1,128,1,fp8,fp8,0,0.06577119827270508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,2,128,1,float16,float16,0,0.06999040246009827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,2,128,1,float16,fp8,0,0.0658527970314026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,2,128,1,fp8,fp8,0,0.06576799750328063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,4,128,1,float16,float16,0,0.07000799775123596
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,4,128,1,float16,fp8,0,0.0661087989807129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,4,128,1,fp8,fp8,0,0.06583520174026489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,8,128,1,float16,float16,0,0.0708191990852356
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,8,128,1,float16,fp8,0,0.06576640009880066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,8,128,1,fp8,fp8,0,0.06594079732894897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,8,128,1,float16,fp8,0,0.2357088088989258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,48,128,1,float16,fp8,0,0.04116320013999939
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,48,128,1,fp8,fp8,0,0.04145280122756958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,1,128,1,float16,fp8,0,0.03742560148239136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,1,128,1,fp8,fp8,0,0.037360000610351565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,2,128,1,float16,float16,0,0.03953759968280792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,2,128,1,float16,fp8,0,0.03733119964599609
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,2,128,1,fp8,fp8,0,0.037462401390075686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,4,128,1,float16,float16,0,0.0395440012216568
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,4,128,1,float16,fp8,0,0.03741919994354248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,4,128,1,fp8,fp8,0,0.0375247985124588
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,8,128,1,float16,float16,0,0.0394896000623703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,8,128,1,float16,fp8,0,0.03729119896888733
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,8,128,1,fp8,fp8,0,0.037273600697517395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,48,128,1,float16,float16,0,0.025526401400566102
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,48,128,1,float16,fp8,0,0.025035199522972108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,48,128,1,float16,float16,0,0.04135839939117432
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,48,128,1,float16,float16,0,0.14221279621124266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,48,128,1,fp8,fp8,0,0.025036799907684325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,1,128,1,float16,float16,0,0.024732799828052522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,1,128,1,float16,fp8,0,0.02294880002737045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,2,128,1,float16,float16,0,0.024769599735736846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,1,128,1,float16,float16,0,0.039366400241851805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,2,128,1,float16,fp8,0,0.0235727995634079
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,2,128,1,fp8,fp8,0,0.02468319982290268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,4,128,1,float16,fp8,0,0.024711999297142028
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,4,128,1,fp8,fp8,0,0.02481440007686615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,8,128,1,float16,float16,0,0.024899199604988098
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,8,128,1,float16,fp8,0,0.024617600440979003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,8,128,1,fp8,fp8,0,0.02476480007171631
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,48,128,1,float16,float16,0,0.018297599256038667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,48,128,1,float16,fp8,0,0.016641600430011748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,48,128,1,fp8,fp8,0,0.01672320067882538
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,1,128,1,float16,float16,0,0.016579200327396394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,1,128,1,float16,fp8,0,0.016652800142765045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,1,128,1,fp8,fp8,0,0.016596800088882445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,2,128,1,float16,float16,0,0.016755199432373045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,2,128,1,float16,fp8,0,0.01653279960155487
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,2,128,1,fp8,fp8,0,0.016575999557971954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,1,128,1,fp8,fp8,0,0.02296479940414429
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,4,128,1,float16,float16,0,0.016680000722408293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,4,128,1,float16,float16,0,0.02479040026664734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,4,128,1,float16,fp8,0,0.016624000668525696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,4,128,1,fp8,fp8,0,0.01656640022993088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,8,128,1,float16,fp8,0,0.016599999368190767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,8,128,1,fp8,fp8,0,0.016582399606704712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,48,128,1,float16,float16,0,0.014553600549697876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,48,128,1,float16,fp8,0,0.013822400569915771
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,48,128,1,fp8,fp8,0,0.014232000708580017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,1,128,1,float16,float16,0,0.014571200311183929
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,1,128,1,float16,fp8,0,0.012617599964141846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,1,128,1,fp8,fp8,0,0.013660800457000733
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,2,128,1,float16,float16,0,0.012889599800109864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,2,128,1,float16,fp8,0,0.01374559998512268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,2,128,1,fp8,fp8,0,0.012664000689983367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,4,128,1,float16,fp8,0,0.012555199861526489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,4,128,1,fp8,fp8,0,0.012622399628162384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,8,128,1,float16,float16,0,0.012534399330615998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,8,128,1,float16,fp8,0,0.01249760016798973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,8,128,1,fp8,fp8,0,0.012559999525547028
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,48,128,1,float16,float16,0,0.012256000190973282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,48,128,1,float16,fp8,0,0.010843200236558914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,48,128,1,fp8,fp8,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,1,128,1,float16,float16,0,0.01170400008559227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,1,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,1,128,1,fp8,fp8,0,0.01080000028014183
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,2,128,1,float16,fp8,0,0.01074720025062561
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,2,128,1,fp8,fp8,0,0.011340799927711486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,4,128,1,float16,float16,0,0.010734400153160096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,4,128,1,float16,fp8,0,0.010683199763298035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,8,128,1,float16,float16,0,0.01659200042486191
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,8,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,8,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,8,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,48,128,1,float16,float16,0,0.011785600334405899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,48,128,1,float16,fp8,0,0.010364799946546554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,48,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,4,128,1,float16,float16,0,0.014448000490665436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,1,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,1,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,2,128,1,float16,float16,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,2,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,2,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,4,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,4,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,4,128,1,fp8,fp8,0,0.010311999917030334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,8,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,8,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,2,128,1,float16,float16,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,8,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,1,128,1,float16,float16,0,0.24079680442810059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,1,128,1,float16,fp8,0,0.22535839080810546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,1,128,1,fp8,fp8,0,0.22405118942260743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,2,128,1,float16,float16,0,0.24278879165649414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,4,128,1,fp8,fp8,0,0.010651200264692306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,2,128,1,float16,fp8,0,0.22370560169219972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,2,128,1,fp8,fp8,0,0.22554559707641603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,4,128,1,float16,float16,0,0.24284639358520507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,4,128,1,float16,fp8,0,0.2238879919052124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,4,128,1,fp8,fp8,0,0.22576639652252198
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,8,128,1,float16,float16,0,0.24266560077667237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,8,128,1,float16,fp8,0,0.22403359413146973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,1,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,48,128,1,fp8,fp8,0,0.11714400053024292
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,0,0.11702560186386109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,8,128,1,fp8,fp8,0,0.22590239048004152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,1,128,1,float16,float16,0,0.12523200511932372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,1,128,1,float16,fp8,0,0.11705759763717652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,1,128,1,fp8,fp8,0,0.11730079650878907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,2,128,1,float16,float16,0,0.1254256010055542
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,2,128,1,float16,fp8,0,0.11699039936065674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,2,128,1,fp8,fp8,0,0.11735999584197998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,4,128,1,float16,float16,0,0.1253056049346924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,4,128,1,fp8,fp8,0,0.11715199947357177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,4,128,1,float16,fp8,0,0.11722240447998047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,8,128,1,float16,fp8,0,0.11721600294113159
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,8,128,1,fp8,fp8,0,0.11712960004806519
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,0,0.06994400024414063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,0,0.0637440025806427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,48,128,1,fp8,fp8,0,0.06376320123672485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,1,128,1,float16,float16,0,0.06793760061264038
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,1,128,1,float16,fp8,0,0.0637615978717804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,1,128,1,fp8,fp8,0,0.06376000046730042
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,2,128,1,float16,float16,0,0.06793280243873596
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,2,128,1,float16,fp8,0,0.06383519768714904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,2,128,1,fp8,fp8,0,0.06383039951324462
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,4,128,1,float16,float16,0,0.06803680062294007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,4,128,1,float16,fp8,0,0.06374559998512268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,4,128,1,fp8,fp8,0,0.06379839777946472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,8,128,1,float16,float16,0,0.06787999868392944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,0,0.1276095986366272
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,8,128,1,fp8,fp8,0,0.06387680172920226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,0,0.04032799899578095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,0,0.03727520108222961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,8,128,1,float16,float16,0,0.12517280578613282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,1,128,1,float16,float16,0,0.03919360041618347
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,1,128,1,float16,fp8,0,0.03707840144634247
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,1,128,1,fp8,fp8,0,0.03710399866104126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,2,128,1,float16,float16,0,0.039190399646759036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,2,128,1,float16,fp8,0,0.03718560039997101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,2,128,1,fp8,fp8,0,0.03708640038967133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,4,128,1,float16,float16,0,0.03922240138053894
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,4,128,1,float16,fp8,0,0.03710399866104126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,4,128,1,fp8,fp8,0,0.03704319894313812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,8,128,1,float16,float16,0,0.039156800508499144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,8,128,1,float16,fp8,0,0.0371071994304657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,8,128,1,fp8,fp8,0,0.037084800004959104
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,0,0.024879999458789825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,8,128,1,float16,fp8,0,0.06371520161628723
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,0,0.022808000445365906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,48,128,1,fp8,fp8,0,0.02285120040178299
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,1,128,1,float16,float16,0,0.024777600169181825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,1,128,1,float16,fp8,0,0.022809599339962006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,1,128,1,fp8,fp8,0,0.02290399968624115
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,2,128,1,float16,float16,0,0.02492160052061081
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,48,128,1,fp8,fp8,0,0.03722400069236755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,2,128,1,fp8,fp8,0,0.022788800299167633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,4,128,1,float16,float16,0,0.02489439994096756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,4,128,1,float16,fp8,0,0.02282560020685196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,4,128,1,fp8,fp8,0,0.02393440008163452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,8,128,1,float16,float16,0,0.0248416006565094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,8,128,1,float16,fp8,0,0.02282560020685196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,0,0.01754560023546219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,0,0.016601599752902985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,48,128,1,fp8,fp8,0,0.016631999611854555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,1,128,1,float16,float16,0,0.01664479970932007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,1,128,1,float16,fp8,0,0.01660960018634796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,1,128,1,fp8,fp8,0,0.016550399363040924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,2,128,1,float16,float16,0,0.01669919937849045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,2,128,1,fp8,fp8,0,0.016622400283813475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,4,128,1,float16,float16,0,0.016655999422073364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,4,128,1,float16,fp8,0,0.01573760062456131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,4,128,1,fp8,fp8,0,0.015800000727176668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,8,128,1,float16,float16,0,0.016628800332546233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,8,128,1,float16,fp8,0,0.016548800468444824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,8,128,1,fp8,fp8,0,0.016625599563121797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,0,0.014567999541759491
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,0,0.012544000148773193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,48,128,1,fp8,fp8,0,0.01255040019750595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,1,128,1,float16,float16,0,0.012558400630950928
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,2,128,1,float16,fp8,0,0.02274720072746277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,1,128,1,float16,fp8,0,0.012620800733566284
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,2,128,1,float16,float16,0,0.012516799569129943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,2,128,1,float16,fp8,0,0.012531200051307678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,2,128,1,fp8,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,8,128,1,fp8,fp8,0,0.02284960001707077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,4,128,1,float16,float16,0,0.012587200105190276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,4,128,1,float16,fp8,0,0.012486399710178375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,8,128,1,float16,float16,0,0.014396800100803376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,8,128,1,float16,fp8,0,0.01263359934091568
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,8,128,1,fp8,fp8,0,0.01266079992055893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,2,128,1,float16,fp8,0,0.016598400473594666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,0,0.012604799866676331
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,0,0.010518400371074677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,48,128,1,fp8,fp8,0,0.010622400045394897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,1,128,1,float16,float16,0,0.010623999685049058
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,1,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,1,128,1,fp8,fp8,0,0.010567999631166457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,2,128,1,float16,float16,0,0.011004800349473954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,2,128,1,float16,fp8,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,2,128,1,fp8,fp8,0,0.010619200021028518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,4,128,1,float16,fp8,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,4,128,1,float16,float16,0,0.010782399773597717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,4,128,1,fp8,fp8,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,8,128,1,float16,float16,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,8,128,1,float16,fp8,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,8,128,1,fp8,fp8,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,0,0.012481600046157837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,48,128,1,fp8,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,1,128,1,float16,float16,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,1,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,2,128,1,float16,float16,0,0.010639999806880952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,2,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,1,128,1,fp8,fp8,0,0.012539200484752655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,2,128,1,fp8,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,4,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,4,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,8,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,8,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,4,128,1,fp8,fp8,0,0.012547199428081513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,1,128,1,float16,fp8,0,0.010558400303125381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,4,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,8,128,1,fp8,fp8,0,0.010320000350475311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,1,128,1,float16,fp8,0,15.133216857910156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,1,128,1,fp8,fp8,0,15.099526977539062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,1,128,1,float16,float16,0,22.642367553710937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,2,128,1,float16,float16,0,22.75200500488281
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,2,128,1,float16,fp8,0,16.524765014648438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,2,128,1,fp8,fp8,0,16.783981323242188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,4,128,1,float16,float16,0,24.705482482910156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,4,128,1,float16,fp8,0,15.017581176757812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,4,128,1,fp8,fp8,0,16.117765808105467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,8,128,1,float16,float16,0,27.28954162597656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,8,128,1,float16,fp8,0,15.242570495605468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,8,128,1,fp8,fp8,0,14.998054504394531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,40,128,1,fp8,fp8,0,7.332233428955078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,40,128,1,float16,fp8,0,7.953221130371094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,1,128,1,float16,float16,0,12.552324676513672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,1,128,1,float16,fp8,0,6.877582550048828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,1,128,1,fp8,fp8,0,7.393256378173828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,2,128,1,float16,float16,0,12.169152069091798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,2,128,1,float16,fp8,0,7.0929443359375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,2,128,1,fp8,fp8,0,7.8610481262207035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,4,128,1,float16,fp8,0,7.749409484863281
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,4,128,1,fp8,fp8,0,8.585282897949218
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,4,128,1,float16,float16,0,12.049009704589844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,8,128,1,fp8,fp8,0,7.56866226196289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,8,128,1,float16,fp8,0,8.104625701904297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,8,128,1,float16,float16,0,12.8172119140625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,40,128,1,float16,fp8,0,3.725726318359375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,40,128,1,fp8,fp8,0,3.6858943939208983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,1,128,1,float16,fp8,0,3.603598403930664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,1,128,1,float16,float16,0,5.7578895568847654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,1,128,1,fp8,fp8,0,3.533273696899414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,2,128,1,float16,float16,0,6.238328170776367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,2,128,1,float16,fp8,0,3.585958480834961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,2,128,1,fp8,fp8,0,3.845804977416992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,4,128,1,float16,float16,0,5.48231201171875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,4,128,1,float16,fp8,0,3.583652877807617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,4,128,1,fp8,fp8,0,3.5744449615478517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,8,128,1,float16,float16,0,5.4797710418701175
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,8,128,1,float16,fp8,0,3.7765342712402346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,8,128,1,fp8,fp8,0,3.6867374420166015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,40,128,1,float16,fp8,0,1.8501007080078125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,40,128,1,fp8,fp8,0,1.82117919921875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,1,128,1,float16,float16,0,2.02020320892334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,1,128,1,float16,fp8,0,2.1469295501708983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,1,128,1,fp8,fp8,0,1.9829824447631836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,2,128,1,float16,float16,0,1.9953727722167969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,2,128,1,float16,fp8,0,2.0411199569702148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,2,128,1,fp8,fp8,0,1.876963233947754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,4,128,1,float16,float16,0,2.016009521484375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,4,128,1,float16,fp8,0,2.470822334289551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,4,128,1,fp8,fp8,0,1.7579248428344727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,8,128,1,float16,float16,0,2.0069759368896483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,8,128,1,float16,fp8,0,2.6255712509155273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,8,128,1,fp8,fp8,0,1.7968832015991212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,1,128,1,float16,float16,0,14.432826232910156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,1,128,1,float16,fp8,0,9.009292602539062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,1,128,1,fp8,fp8,0,8.634406280517577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,2,128,1,float16,float16,0,15.140185546875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,2,128,1,float16,fp8,0,8.74261245727539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,40,128,1,float16,float16,0,2.203995132446289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,2,128,1,fp8,fp8,0,9.17129135131836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,40,128,1,float16,float16,0,5.370612716674804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,40,128,1,float16,float16,0,12.676229095458984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,4,128,1,float16,float16,0,13.109176635742188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,4,128,1,fp8,fp8,0,8.016734313964843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,4,128,1,float16,fp8,0,9.541580963134766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,8,128,1,float16,fp8,0,8.214777374267578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,40,128,1,float16,float16,0,7.4985298156738285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,8,128,1,float16,float16,0,14.923246765136719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,8,128,1,fp8,fp8,0,8.014807891845702
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,40,128,1,float16,fp8,0,4.439707183837891
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,1,128,1,float16,fp8,0,4.200775909423828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,40,128,1,fp8,fp8,0,4.703156661987305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,1,128,1,float16,float16,0,7.4175537109375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,1,128,1,fp8,fp8,0,4.17309455871582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,2,128,1,float16,fp8,0,4.662182235717774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,2,128,1,float16,float16,0,6.724195098876953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,2,128,1,fp8,fp8,0,4.498750305175781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,4,128,1,float16,float16,0,6.742014312744141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,4,128,1,float16,fp8,0,4.51257438659668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,4,128,1,fp8,fp8,0,4.2234752655029295
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,8,128,1,float16,fp8,0,4.095334243774414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,8,128,1,float16,float16,0,7.060842895507813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,40,128,1,float16,float16,0,2.992483139038086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,8,128,1,fp8,fp8,0,4.6640174865722654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,40,128,1,float16,fp8,0,2.1156591415405273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,40,128,1,fp8,fp8,0,2.420017623901367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,1,128,1,float16,fp8,0,2.109884834289551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,1,128,1,float16,float16,0,3.4187793731689453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,1,128,1,fp8,fp8,0,1.9744560241699218
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,2,128,1,float16,fp8,0,2.116961669921875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,2,128,1,float16,float16,0,3.5123390197753905
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,2,128,1,fp8,fp8,0,2.022553634643555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,4,128,1,float16,float16,0,2.8263919830322264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,4,128,1,float16,fp8,0,2.3225423812866213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,4,128,1,fp8,fp8,0,2.3563295364379884
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,8,128,1,float16,float16,0,2.795827293395996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,8,128,1,float16,fp8,0,2.135121536254883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,8,128,1,fp8,fp8,0,2.166812705993652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,40,128,1,float16,float16,0,1.4947775840759276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,40,128,1,float16,fp8,0,1.726780891418457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,40,128,1,fp8,fp8,0,1.1823056221008301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,1,128,1,float16,float16,0,1.694615936279297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,1,128,1,float16,fp8,0,1.0584400177001954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,1,128,1,fp8,fp8,0,1.0478704452514649
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,2,128,1,float16,fp8,0,1.0386591911315919
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,2,128,1,float16,float16,0,1.7731407165527344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,4,128,1,float16,float16,0,1.1862256050109863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,4,128,1,float16,fp8,0,1.2184432029724122
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,4,128,1,fp8,fp8,0,1.0608271598815917
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,8,128,1,float16,float16,0,1.1792559623718262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,8,128,1,float16,fp8,0,1.0525903701782227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,2,128,1,fp8,fp8,0,1.0548912048339845
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,8,128,1,fp8,fp8,0,1.026193618774414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,1,128,1,float16,fp8,0,5.739643096923828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,1,128,1,fp8,fp8,0,5.990660858154297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,2,128,1,float16,float16,0,9.141092681884766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,1,128,1,float16,float16,0,10.560399627685547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,2,128,1,float16,fp8,0,5.712308883666992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,2,128,1,fp8,fp8,0,6.228200149536133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,4,128,1,float16,fp8,0,6.437030029296875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,4,128,1,fp8,fp8,0,5.809548950195312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,4,128,1,float16,float16,0,9.316180419921874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,8,128,1,float16,float16,0,10.025233459472656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,8,128,1,float16,fp8,0,6.749790191650391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,40,128,1,float16,float16,0,5.135062408447266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,8,128,1,fp8,fp8,0,6.15473747253418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,40,128,1,fp8,fp8,0,3.264215850830078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,40,128,1,float16,fp8,0,3.7316734313964846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,1,128,1,float16,fp8,0,2.896790313720703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,1,128,1,float16,float16,0,4.944499206542969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,1,128,1,fp8,fp8,0,3.1019439697265625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,2,128,1,float16,fp8,0,2.8835935592651367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,2,128,1,float16,float16,0,5.366268920898437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,2,128,1,fp8,fp8,0,2.8126384735107424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,4,128,1,float16,fp8,0,2.8006208419799803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,4,128,1,float16,float16,0,5.081187057495117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,4,128,1,fp8,fp8,0,3.278062438964844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,8,128,1,float16,fp8,0,2.8863199234008787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,8,128,1,float16,float16,0,4.7713886260986325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,8,128,1,fp8,fp8,0,3.2185600280761717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,40,128,1,float16,float16,0,1.9564832687377929
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,40,128,1,fp8,fp8,0,1.855740737915039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,1,128,1,float16,fp8,0,1.493454360961914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,1,128,1,float16,float16,0,2.0492992401123047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,1,128,1,fp8,fp8,0,1.4077743530273437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,2,128,1,float16,float16,0,1.6475040435791015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,40,128,1,float16,fp8,0,1.5062239646911622
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,2,128,1,float16,fp8,0,1.7413904190063476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,2,128,1,fp8,fp8,0,1.400054359436035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,4,128,1,float16,fp8,0,1.416755199432373
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,4,128,1,float16,float16,0,1.8212112426757812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,4,128,1,fp8,fp8,0,1.4468031883239747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,8,128,1,float16,float16,0,1.7064191818237304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,8,128,1,float16,fp8,0,1.415067195892334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,8,128,1,fp8,fp8,0,1.8664800643920898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,40,128,1,float16,fp8,0,0.8771583557128906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,40,128,1,fp8,fp8,0,0.815329647064209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,1,128,1,float16,float16,0,0.9329263687133789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,1,128,1,float16,fp8,0,0.7724016189575196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,1,128,1,fp8,fp8,0,0.7631343841552735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,2,128,1,float16,fp8,0,0.7463056087493897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,2,128,1,float16,float16,0,0.9851887702941895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,40,128,1,float16,float16,0,0.9194784164428711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,4,128,1,float16,float16,0,0.8425200462341309
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,4,128,1,float16,fp8,0,1.1722559928894043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,4,128,1,fp8,fp8,0,0.7510752201080322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,8,128,1,float16,float16,0,0.8985615730285644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,8,128,1,float16,fp8,0,0.8502767562866211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,2,128,1,fp8,fp8,0,0.7595856189727783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,8,128,1,fp8,fp8,0,0.7501584053039551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,1,128,1,float16,fp8,0,8.78377456665039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,1,128,1,fp8,fp8,0,7.449832153320313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,1,128,1,float16,float16,0,12.636593627929688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,2,128,1,float16,float16,0,12.587484741210938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,2,128,1,fp8,fp8,0,7.504170989990234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,2,128,1,float16,fp8,0,8.448004913330077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,4,128,1,float16,fp8,0,8.079769897460938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,4,128,1,fp8,fp8,0,7.68392333984375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,4,128,1,float16,float16,0,13.224342346191406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,8,128,1,float16,float16,0,15.206367492675781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,40,128,1,float16,float16,0,6.926526641845703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,8,128,1,float16,fp8,0,9.279643249511718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,8,128,1,fp8,fp8,0,8.433662414550781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,40,128,1,float16,fp8,0,4.431326293945313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,40,128,1,fp8,fp8,0,4.056399917602539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,1,128,1,float16,fp8,0,3.8848945617675783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,1,128,1,float16,float16,0,6.894500732421875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,1,128,1,fp8,fp8,0,4.259888076782227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,2,128,1,float16,fp8,0,3.9157745361328127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,2,128,1,float16,float16,0,6.3495136260986325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,2,128,1,fp8,fp8,0,4.139961624145508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,4,128,1,float16,fp8,0,3.750680160522461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,4,128,1,fp8,fp8,0,3.7753040313720705
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,8,128,1,float16,fp8,0,3.807447814941406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,8,128,1,float16,float16,0,6.295584106445313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,4,128,1,float16,float16,0,6.631854248046875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,8,128,1,fp8,fp8,0,3.969696044921875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,40,128,1,float16,float16,0,2.2627391815185547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,40,128,1,float16,fp8,0,2.3021135330200195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,40,128,1,fp8,fp8,0,1.9550703048706055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,1,128,1,float16,fp8,0,1.9569887161254882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,1,128,1,fp8,fp8,0,1.8311487197875977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,1,128,1,float16,float16,0,3.2548702239990233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,2,128,1,float16,fp8,0,1.9833360671997071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,2,128,1,float16,float16,0,2.8659887313842773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,2,128,1,fp8,fp8,0,2.3864160537719727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,4,128,1,float16,float16,0,2.1494768142700194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,4,128,1,float16,fp8,0,1.798851203918457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,4,128,1,fp8,fp8,0,2.1236719131469726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,8,128,1,float16,float16,0,2.097545623779297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,40,128,1,float16,float16,0,1.3225711822509765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,8,128,1,fp8,fp8,0,1.8043760299682616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,40,128,1,float16,fp8,0,1.4028032302856446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,8,128,1,float16,fp8,0,2.3391008377075195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,1,128,1,float16,float16,0,1.0550640106201172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,40,128,1,fp8,fp8,0,1.4174768447875976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,1,128,1,fp8,fp8,0,1.029843235015869
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,2,128,1,float16,fp8,0,0.9884431838989258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,2,128,1,fp8,fp8,0,0.9652223587036133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,4,128,1,float16,float16,0,1.0754639625549316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,4,128,1,float16,fp8,0,0.9535984039306641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,4,128,1,fp8,fp8,0,1.0798224449157714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,8,128,1,float16,float16,0,1.0964495658874511
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,1,128,1,float16,fp8,0,1.038468837738037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,40,128,1,float16,float16,0,0.6309008121490478
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,8,128,1,float16,fp8,0,0.9527935981750488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,8,128,1,fp8,fp8,0,0.9684191703796386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,2,128,1,float16,float16,0,1.1942095756530762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,40,128,1,float16,fp8,0,0.5570799827575683
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,40,128,1,fp8,fp8,0,0.568065595626831
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,1,128,1,float16,float16,0,0.6048687934875489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,1,128,1,float16,fp8,0,0.5127679824829101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,1,128,1,fp8,fp8,0,0.5213568210601807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,2,128,1,float16,float16,0,0.5791664123535156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,2,128,1,float16,fp8,0,0.5626783847808838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,2,128,1,fp8,fp8,0,0.5127088069915772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,4,128,1,float16,fp8,0,0.5173583984375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,4,128,1,fp8,fp8,0,0.6118959903717041
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,8,128,1,float16,float16,0,0.5797344207763672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,8,128,1,float16,fp8,0,0.5269008159637452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,8,128,1,fp8,fp8,0,0.5212495803833008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,4,128,1,float16,float16,0,0.5887296199798584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,1,128,1,float16,fp8,0,4.712745666503906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,1,128,1,fp8,fp8,0,4.548977661132812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,1,128,1,float16,float16,0,6.5684959411621096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,2,128,1,float16,float16,0,6.339086532592773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,2,128,1,float16,fp8,0,4.790983963012695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,2,128,1,fp8,fp8,0,4.86614875793457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,4,128,1,float16,float16,0,6.975094604492187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,4,128,1,float16,fp8,0,4.370430374145508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,4,128,1,fp8,fp8,0,4.809648132324218
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,8,128,1,float16,float16,0,7.546364593505859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,8,128,1,float16,fp8,0,4.364398574829101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,40,128,1,float16,float16,0,4.059286499023438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,8,128,1,fp8,fp8,0,4.328420639038086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,40,128,1,fp8,fp8,0,2.3745695114135743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,40,128,1,float16,fp8,0,2.768367958068848
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,1,128,1,float16,fp8,0,2.2571088790893556
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,1,128,1,float16,float16,0,3.9015296936035155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,1,128,1,fp8,fp8,0,2.2923887252807615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,2,128,1,float16,float16,0,2.5394464492797852
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,2,128,1,float16,fp8,0,2.163585662841797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,2,128,1,fp8,fp8,0,2.561614418029785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,4,128,1,fp8,fp8,0,2.229580879211426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,4,128,1,float16,float16,0,3.4456863403320312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,4,128,1,float16,fp8,0,2.874372863769531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,8,128,1,float16,fp8,0,2.285438346862793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,40,128,1,float16,float16,0,1.6668319702148438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,8,128,1,float16,float16,0,3.349049758911133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,8,128,1,fp8,fp8,0,2.3409536361694334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,40,128,1,float16,fp8,0,1.2092415809631347
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,1,128,1,float16,fp8,0,1.1395119667053222
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,40,128,1,fp8,fp8,0,1.7201536178588868
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,1,128,1,float16,float16,0,1.8984464645385741
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,1,128,1,fp8,fp8,0,1.1078720092773438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,2,128,1,float16,float16,0,1.2038880348205567
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,2,128,1,float16,fp8,0,1.2361680030822755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,2,128,1,fp8,fp8,0,1.3117728233337402
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,4,128,1,float16,float16,0,1.2690719604492187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,4,128,1,fp8,fp8,0,1.1164735794067382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,4,128,1,float16,fp8,0,1.3647151947021485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,8,128,1,float16,float16,0,1.2662704467773438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,8,128,1,float16,fp8,0,1.17260799407959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,40,128,1,float16,float16,0,0.7051023960113525
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,40,128,1,float16,fp8,0,0.6757215976715087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,8,128,1,fp8,fp8,0,1.1421919822692872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,1,128,1,float16,float16,0,0.6404160022735595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,1,128,1,float16,fp8,0,0.6602911949157715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,2,128,1,float16,float16,0,0.6464000225067139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,2,128,1,float16,fp8,0,0.5808176040649414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,2,128,1,fp8,fp8,0,0.5863615989685058
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,4,128,1,float16,float16,0,0.6486512184143066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,4,128,1,float16,fp8,0,0.5769328117370606
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,4,128,1,fp8,fp8,0,0.5848544120788575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,8,128,1,float16,float16,0,0.6502416133880615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,40,128,1,fp8,fp8,0,0.6436079978942871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,40,128,1,float16,float16,0,0.3953392028808594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,8,128,1,fp8,fp8,0,0.5782832145690918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,1,128,1,fp8,fp8,0,0.5967599868774414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,40,128,1,float16,fp8,0,0.34860479831695557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,40,128,1,fp8,fp8,0,0.3513823986053467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,1,128,1,float16,float16,0,0.353600001335144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,1,128,1,float16,fp8,0,0.3241152048110962
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,1,128,1,fp8,fp8,0,0.31964640617370604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,2,128,1,float16,float16,0,0.36306400299072267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,2,128,1,float16,fp8,0,0.3198911905288696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,2,128,1,fp8,fp8,0,0.3235424041748047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,4,128,1,float16,float16,0,0.3562160015106201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,4,128,1,float16,fp8,0,0.3232399940490723
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,8,128,1,float16,fp8,0,0.6388304233551025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,4,128,1,fp8,fp8,0,0.3194272041320801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,8,128,1,float16,float16,0,0.3689296007156372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,8,128,1,float16,fp8,0,0.32148959636688235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,8,128,1,fp8,fp8,0,0.3192784070968628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,1,128,1,fp8,fp8,0,4.144504165649414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,1,128,1,float16,fp8,0,4.366968154907227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,1,128,1,float16,float16,0,6.088569641113281
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,2,128,1,float16,fp8,0,4.070924758911133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,2,128,1,fp8,fp8,0,4.266228866577149
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,2,128,1,float16,float16,0,6.048086547851563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,4,128,1,float16,float16,0,6.766859436035157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,4,128,1,float16,fp8,0,4.242835235595703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,4,128,1,fp8,fp8,0,4.401704025268555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,8,128,1,float16,float16,0,6.445811462402344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,40,128,1,float16,float16,0,3.460593414306641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,8,128,1,float16,fp8,0,4.592132949829102
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,8,128,1,fp8,fp8,0,4.1800895690917965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,40,128,1,float16,fp8,0,2.3396976470947264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,1,128,1,float16,fp8,0,2.09161434173584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,40,128,1,fp8,fp8,0,2.480009651184082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,1,128,1,float16,float16,0,2.368230438232422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,1,128,1,fp8,fp8,0,2.0558767318725586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,2,128,1,float16,fp8,0,2.0703807830810548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,2,128,1,float16,float16,0,2.4659183502197264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,2,128,1,fp8,fp8,0,2.962816047668457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,4,128,1,float16,float16,0,2.4047567367553713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,4,128,1,float16,fp8,0,2.4269439697265627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,4,128,1,fp8,fp8,0,2.2963647842407227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,8,128,1,float16,fp8,0,2.0639328002929687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,8,128,1,float16,float16,0,2.6883167266845702
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,40,128,1,float16,float16,0,1.2991935729980468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,8,128,1,fp8,fp8,0,2.306083106994629
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,40,128,1,float16,fp8,0,1.2596240043640137
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,1,128,1,float16,float16,0,1.1692831993103028
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,40,128,1,fp8,fp8,0,1.7348831176757813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,1,128,1,float16,fp8,0,1.0768320083618164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,1,128,1,fp8,fp8,0,1.077723217010498
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,2,128,1,float16,fp8,0,1.0517840385437012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,2,128,1,fp8,fp8,0,1.0989680290222168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,2,128,1,float16,float16,0,1.5807056427001953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,4,128,1,float16,float16,0,1.1536831855773926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,4,128,1,float16,fp8,0,1.055065631866455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,4,128,1,fp8,fp8,0,1.0483200073242187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,8,128,1,float16,fp8,0,1.3080415725708008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,8,128,1,float16,float16,0,1.6120927810668946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,40,128,1,float16,float16,0,0.6984367847442627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,8,128,1,fp8,fp8,0,1.0559344291687012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,40,128,1,float16,fp8,0,0.6326704025268555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,1,128,1,float16,float16,0,0.6669951915740967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,1,128,1,float16,fp8,0,0.5579855918884278
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,40,128,1,fp8,fp8,0,1.0345439910888672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,1,128,1,fp8,fp8,0,0.5602287769317627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,2,128,1,float16,float16,0,0.7318960189819336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,2,128,1,float16,fp8,0,0.5544928073883056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,2,128,1,fp8,fp8,0,0.5551087856292725
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,4,128,1,float16,float16,0,0.6094367980957032
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,4,128,1,float16,fp8,0,0.6729392051696778
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,4,128,1,fp8,fp8,0,0.5545631885528565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,8,128,1,float16,float16,0,0.6045711994171142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,40,128,1,float16,float16,0,0.37865920066833497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,8,128,1,fp8,fp8,0,0.5696800231933594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,40,128,1,float16,fp8,0,0.33148798942565916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,40,128,1,fp8,fp8,0,0.3365679979324341
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,1,128,1,float16,float16,0,0.3269295930862427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,1,128,1,float16,fp8,0,0.29701759815216067
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,1,128,1,fp8,fp8,0,0.30141921043395997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,2,128,1,float16,float16,0,0.3265647888183594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,2,128,1,float16,fp8,0,0.2976111888885498
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,2,128,1,fp8,fp8,0,0.3017359972000122
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,4,128,1,float16,float16,0,0.3280816078186035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,4,128,1,float16,fp8,0,0.29759039878845217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,4,128,1,fp8,fp8,0,0.30104639530181887
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,8,128,1,float16,float16,0,0.3336575984954834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,8,128,1,float16,fp8,0,0.29700798988342286
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,40,128,1,float16,float16,0,0.21005918979644775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,8,128,1,fp8,fp8,0,0.3003119945526123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,40,128,1,float16,fp8,0,0.18827680349349976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,40,128,1,fp8,fp8,0,0.18953440189361573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,1,128,1,float16,float16,0,0.18518879413604736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,1,128,1,fp8,fp8,0,0.16862399578094484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,2,128,1,float16,float16,0,0.1850864052772522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,2,128,1,float16,fp8,0,0.16859840154647826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,2,128,1,fp8,fp8,0,0.1702288031578064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,4,128,1,float16,float16,0,0.1849568009376526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,4,128,1,float16,fp8,0,0.17065600156784058
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,8,128,1,float16,fp8,0,0.5551360130310059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,4,128,1,fp8,fp8,0,0.16832959651947021
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,8,128,1,float16,float16,0,0.18987040519714354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,8,128,1,float16,fp8,0,0.17246240377426147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,8,128,1,fp8,fp8,0,0.1708799958229065
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,1,128,1,float16,fp8,0,0.16832959651947021
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,1,128,1,float16,fp8,0,2.4766016006469727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,1,128,1,float16,float16,0,2.923583984375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,1,128,1,fp8,fp8,0,2.452249526977539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,2,128,1,float16,fp8,0,2.506153678894043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,2,128,1,fp8,fp8,0,2.449924850463867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,4,128,1,float16,fp8,0,2.4649919509887694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,4,128,1,float16,float16,0,3.1542192459106446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,2,128,1,float16,float16,0,2.916744041442871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,4,128,1,fp8,fp8,0,2.464916801452637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,40,128,1,float16,float16,0,1.597486400604248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,8,128,1,fp8,fp8,0,2.505241584777832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,8,128,1,float16,fp8,0,2.9216976165771484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,8,128,1,float16,float16,0,3.6824417114257812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,1,128,1,float16,float16,0,1.362769603729248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,40,128,1,fp8,fp8,0,1.5974703788757325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,1,128,1,float16,fp8,0,1.2651552200317382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,1,128,1,fp8,fp8,0,1.373459243774414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,2,128,1,float16,fp8,0,1.2500783920288085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,2,128,1,fp8,fp8,0,1.2606415748596191
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,4,128,1,float16,float16,0,1.3997039794921875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,40,128,1,float16,fp8,0,1.691147232055664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,4,128,1,float16,fp8,0,1.258576011657715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,4,128,1,fp8,fp8,0,1.3659567832946777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,8,128,1,float16,float16,0,1.349948787689209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,2,128,1,float16,float16,0,1.362508773803711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,8,128,1,float16,fp8,0,1.2564751625061035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,40,128,1,float16,float16,0,0.867131233215332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,8,128,1,fp8,fp8,0,1.2488207817077637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,40,128,1,fp8,fp8,0,0.7554719924926758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,1,128,1,float16,fp8,0,0.6521039962768554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,1,128,1,float16,float16,0,0.8005056381225586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,1,128,1,fp8,fp8,0,0.653704023361206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,2,128,1,float16,float16,0,0.6929728031158447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,2,128,1,float16,fp8,0,0.7749743938446045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,2,128,1,fp8,fp8,0,0.6523007869720459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,4,128,1,float16,float16,0,0.7817535877227784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,4,128,1,float16,fp8,0,0.6447231769561768
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,4,128,1,fp8,fp8,0,0.650387191772461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,8,128,1,float16,fp8,0,0.6424863815307618
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,8,128,1,float16,float16,0,0.9847871780395507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,40,128,1,float16,fp8,0,0.756708812713623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,40,128,1,float16,float16,0,0.4338399887084961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,40,128,1,fp8,fp8,0,0.3996687889099121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,40,128,1,float16,fp8,0,0.4661344051361084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,1,128,1,float16,float16,0,0.3698944091796875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,1,128,1,float16,fp8,0,0.35030241012573243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,2,128,1,float16,float16,0,0.37007200717926025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,2,128,1,float16,fp8,0,0.3490799903869629
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,2,128,1,fp8,fp8,0,0.3573647975921631
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,4,128,1,float16,float16,0,0.3805824041366577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,4,128,1,float16,fp8,0,0.3570480108261108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,4,128,1,fp8,fp8,0,0.3475104093551636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,8,128,1,float16,float16,0,0.37940800189971924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,8,128,1,float16,fp8,0,0.34659039974212646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,8,128,1,fp8,fp8,0,0.3412767887115479
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,8,128,1,fp8,fp8,0,0.6496799945831299
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,40,128,1,float16,fp8,0,0.21696319580078124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,40,128,1,fp8,fp8,0,0.21995038986206056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,1,128,1,float16,float16,0,0.20443520545959473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,1,128,1,float16,fp8,0,0.19120320081710815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,1,128,1,fp8,fp8,0,0.188646399974823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,2,128,1,float16,float16,0,0.2091088056564331
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,2,128,1,float16,fp8,0,0.18889119625091552
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,2,128,1,fp8,fp8,0,0.19165439605712892
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,4,128,1,float16,float16,0,0.20595200061798097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,4,128,1,float16,fp8,0,0.19141919612884523
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,4,128,1,fp8,fp8,0,0.18884960412979127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,8,128,1,float16,float16,0,0.21190879344940186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,8,128,1,float16,fp8,0,0.18924479484558104
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,8,128,1,fp8,fp8,0,0.19155999422073364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,40,128,1,float16,float16,0,0.13573440313339233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,40,128,1,float16,fp8,0,0.1271936058998108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,40,128,1,fp8,fp8,0,0.12535680532455445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,1,128,1,float16,float16,0,0.1187999963760376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,1,128,1,float16,fp8,0,0.11038399934768676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,1,128,1,fp8,fp8,0,0.1111680030822754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,2,128,1,float16,float16,0,0.11701279878616333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,40,128,1,float16,float16,0,0.24156320095062256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,2,128,1,float16,fp8,0,0.11125600337982178
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,2,128,1,fp8,fp8,0,0.11011040210723877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,4,128,1,float16,float16,0,0.12134079933166504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,4,128,1,float16,fp8,0,0.11184799671173096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,4,128,1,fp8,fp8,0,0.10981760025024415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,8,128,1,float16,float16,0,0.1235584020614624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,8,128,1,float16,fp8,0,0.1111680030822754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,8,128,1,fp8,fp8,0,0.11013280153274536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,1,128,1,fp8,fp8,0,0.3753567934036255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,1,128,1,float16,float16,0,2.6308591842651365
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,1,128,1,float16,fp8,0,2.5333152770996095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,2,128,1,float16,float16,0,3.0755552291870116
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,2,128,1,fp8,fp8,0,2.5251760482788086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,2,128,1,float16,fp8,0,2.5919120788574217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,1,128,1,fp8,fp8,0,2.503950309753418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,4,128,1,float16,float16,0,3.7292991638183595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,4,128,1,float16,fp8,0,2.5225215911865235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,4,128,1,fp8,fp8,0,2.5771360397338867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,8,128,1,float16,float16,0,3.1877344131469725
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,8,128,1,float16,fp8,0,2.6344879150390623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,8,128,1,fp8,fp8,0,2.519046401977539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,40,128,1,float16,float16,0,1.8397808074951172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,40,128,1,float16,fp8,0,1.5564047813415527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,40,128,1,fp8,fp8,0,1.562814426422119
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,1,128,1,float16,float16,0,1.638315200805664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,1,128,1,float16,fp8,0,1.2812288284301758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,1,128,1,fp8,fp8,0,1.2829360008239745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,2,128,1,float16,fp8,0,1.2751215934753417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,2,128,1,float16,float16,0,1.5680912017822266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,2,128,1,fp8,fp8,0,1.2809247970581055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,4,128,1,float16,fp8,0,1.2734687805175782
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,4,128,1,float16,float16,0,1.357646369934082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,4,128,1,fp8,fp8,0,1.3733391761779785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,8,128,1,float16,float16,0,1.3779088020324708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,8,128,1,float16,fp8,0,1.2836432456970215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,40,128,1,float16,float16,0,0.8613823890686035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,40,128,1,float16,fp8,0,0.9108495712280273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,8,128,1,fp8,fp8,0,1.4047535896301269
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,40,128,1,fp8,fp8,0,0.8190799713134765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,1,128,1,float16,float16,0,0.6735328197479248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,1,128,1,fp8,fp8,0,0.6987520217895508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,2,128,1,float16,float16,0,0.6790128231048584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,2,128,1,float16,fp8,0,0.7237648010253906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,2,128,1,fp8,fp8,0,0.6569759845733643
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,4,128,1,float16,float16,0,0.7177984237670898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,4,128,1,float16,fp8,0,0.6883935928344727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,4,128,1,fp8,fp8,0,0.6548719882965088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,8,128,1,float16,float16,0,0.763976001739502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,8,128,1,float16,fp8,0,0.6486015796661377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,8,128,1,fp8,fp8,0,0.6523680210113525
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,40,128,1,float16,fp8,0,0.41330881118774415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,40,128,1,float16,float16,0,0.5177231788635254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,40,128,1,fp8,fp8,0,0.4162543773651123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,1,128,1,float16,float16,0,0.3573904037475586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,1,128,1,float16,fp8,0,0.6610496044158936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,1,128,1,fp8,fp8,0,0.3471168041229248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,2,128,1,float16,float16,0,0.35559680461883547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,2,128,1,float16,fp8,0,0.34618558883666994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,2,128,1,fp8,fp8,0,0.34778079986572263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,4,128,1,float16,float16,0,0.36074559688568114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,4,128,1,float16,fp8,0,0.34658401012420653
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,4,128,1,fp8,fp8,0,0.34547359943389894
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,8,128,1,float16,float16,0,0.3703871965408325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,8,128,1,float16,fp8,0,0.345467209815979
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,40,128,1,float16,float16,0,0.23976640701293944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,40,128,1,float16,fp8,0,0.22449920177459717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,40,128,1,fp8,fp8,0,0.22255680561065674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,1,128,1,float16,fp8,0,0.18505760431289672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,1,128,1,fp8,fp8,0,0.18534400463104247
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,2,128,1,float16,float16,0,0.19446879625320435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,2,128,1,float16,fp8,0,0.18504320383071898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,1,128,1,float16,fp8,0,0.4345695972442627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,2,128,1,fp8,fp8,0,0.18507039546966553
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,4,128,1,float16,float16,0,0.19893920421600342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,4,128,1,float16,fp8,0,0.1847615957260132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,4,128,1,fp8,fp8,0,0.18600319623947142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,8,128,1,float16,fp8,0,0.18690719604492187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,8,128,1,fp8,fp8,0,0.1849760055541992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,40,128,1,float16,float16,0,0.1344256043434143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,40,128,1,float16,fp8,0,0.12446080446243286
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,8,128,1,fp8,fp8,0,0.34460160732269285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,40,128,1,fp8,fp8,0,0.12477279901504516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,1,128,1,float16,float16,0,0.1108896017074585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,1,128,1,float16,float16,0,0.19856640100479125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,1,128,1,float16,fp8,0,0.10324959754943848
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,1,128,1,fp8,fp8,0,0.10554239749908448
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,2,128,1,float16,fp8,0,0.10379519462585449
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,2,128,1,fp8,fp8,0,0.1054960012435913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,4,128,1,float16,fp8,0,0.10580159425735473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,4,128,1,fp8,fp8,0,0.1053056001663208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,8,128,1,float16,float16,0,0.11613600254058838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,8,128,1,float16,fp8,0,0.10503360033035278
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,8,128,1,fp8,fp8,0,0.10508160591125489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,40,128,1,float16,float16,0,0.08114719986915589
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,8,128,1,float16,float16,0,0.20119841098785402
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,40,128,1,float16,fp8,0,0.07399680018424988
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,40,128,1,fp8,fp8,0,0.0731328010559082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,1,128,1,float16,float16,0,0.06801599860191346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,1,128,1,float16,fp8,0,0.06400319933891296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,1,128,1,fp8,fp8,0,0.06420480012893677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,2,128,1,float16,float16,0,0.06674240231513977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,2,128,1,float16,fp8,0,0.06449120044708252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,2,128,1,fp8,fp8,0,0.06404320001602173
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,4,128,1,float16,float16,0,0.06922399997711182
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,4,128,1,float16,fp8,0,0.06384320259094238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,4,128,1,fp8,fp8,0,0.06516320109367371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,2,128,1,float16,float16,0,0.1106943964958191
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,8,128,1,float16,fp8,0,0.06509280204772949
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,4,128,1,float16,float16,0,0.1102992057800293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,8,128,1,fp8,fp8,0,0.06481919884681701
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,1,128,1,float16,fp8,0,1.6014400482177735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,1,128,1,float16,float16,0,1.6491615295410156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,8,128,1,float16,float16,0,0.06855040192604064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,2,128,1,float16,float16,0,1.7168304443359375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,2,128,1,float16,fp8,0,1.6992351531982421
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,2,128,1,fp8,fp8,0,1.598532772064209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,1,128,1,fp8,fp8,0,1.6213392257690429
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,4,128,1,float16,fp8,0,1.6055328369140625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,4,128,1,float16,float16,0,2.0650400161743163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,4,128,1,fp8,fp8,0,1.5963567733764648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,8,128,1,float16,float16,0,1.7034143447875976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,8,128,1,float16,fp8,0,1.6948671340942383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,40,128,1,float16,float16,0,1.1338879585266113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,40,128,1,float16,fp8,0,1.0408656120300293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,40,128,1,fp8,fp8,0,1.0352848052978516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,1,128,1,float16,float16,0,0.8279727935791016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,1,128,1,float16,fp8,0,0.8241776466369629
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,1,128,1,fp8,fp8,0,0.8916480064392089
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,2,128,1,float16,float16,0,0.8347632408142089
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,2,128,1,fp8,fp8,0,0.8156559944152832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,8,128,1,fp8,fp8,0,1.5950079917907716
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,4,128,1,float16,float16,0,0.9256128311157227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,4,128,1,float16,fp8,0,0.8144864082336426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,4,128,1,fp8,fp8,0,0.814094352722168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,8,128,1,float16,float16,0,0.9149519920349121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,8,128,1,fp8,fp8,0,0.810694408416748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,40,128,1,float16,fp8,0,0.5279280185699463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,40,128,1,float16,float16,0,0.672976016998291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,40,128,1,fp8,fp8,0,0.5285200119018555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,1,128,1,float16,float16,0,0.5331295967102051
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,2,128,1,float16,fp8,0,0.823033618927002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,1,128,1,float16,fp8,0,0.4217872142791748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,1,128,1,fp8,fp8,0,0.431217622756958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,2,128,1,float16,float16,0,0.4402048110961914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,2,128,1,float16,fp8,0,0.42100000381469727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,2,128,1,fp8,fp8,0,0.43025760650634765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,4,128,1,float16,float16,0,0.44739999771118166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,4,128,1,float16,fp8,0,0.4197231769561768
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,4,128,1,fp8,fp8,0,0.42632322311401366
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,8,128,1,float16,float16,0,0.46204638481140137
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,8,128,1,float16,fp8,0,0.8108127593994141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,8,128,1,float16,fp8,0,0.4194960117340088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,8,128,1,fp8,fp8,0,0.42469282150268556
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,40,128,1,float16,fp8,0,0.27753920555114747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,40,128,1,float16,float16,0,0.3029695987701416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,40,128,1,fp8,fp8,0,0.2784015893936157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,1,128,1,float16,float16,0,0.23241119384765624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,1,128,1,float16,fp8,0,0.22495520114898682
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,1,128,1,fp8,fp8,0,0.22794079780578613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,2,128,1,float16,float16,0,0.2305759906768799
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,2,128,1,float16,fp8,0,0.22603681087493896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,2,128,1,fp8,fp8,0,0.224564790725708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,4,128,1,float16,float16,0,0.24004321098327636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,4,128,1,float16,fp8,0,0.22453761100769043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,4,128,1,fp8,fp8,0,0.2256688117980957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,8,128,1,float16,float16,0,0.24146718978881837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,8,128,1,float16,fp8,0,0.22641439437866212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,40,128,1,float16,fp8,0,0.1521936058998108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,40,128,1,fp8,fp8,0,0.15292320251464844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,1,128,1,float16,fp8,0,0.1240015983581543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,1,128,1,fp8,fp8,0,0.12346880435943604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,2,128,1,float16,float16,0,0.1312175989151001
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,2,128,1,float16,fp8,0,0.12365280389785767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,2,128,1,fp8,fp8,0,0.1241871953010559
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,4,128,1,float16,float16,0,0.13227200508117676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,4,128,1,float16,fp8,0,0.12326719760894775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,4,128,1,fp8,fp8,0,0.12412799596786499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,8,128,1,float16,float16,0,0.13619519472122193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,8,128,1,float16,fp8,0,0.12429759502410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,8,128,1,fp8,fp8,0,0.12411359548568726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,8,128,1,fp8,fp8,0,0.22273759841918944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,40,128,1,float16,float16,0,0.09391679763793945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,1,128,1,float16,float16,0,0.12800960540771483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,40,128,1,fp8,fp8,0,0.08694559931755066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,1,128,1,float16,float16,0,0.07313439846038819
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,1,128,1,float16,fp8,0,0.07284160256385804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,1,128,1,fp8,fp8,0,0.07130399942398072
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,2,128,1,float16,float16,0,0.07460319995880127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,2,128,1,fp8,fp8,0,0.07281919717788696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,4,128,1,float16,float16,0,0.07588160037994385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,4,128,1,float16,fp8,0,0.0720575988292694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,4,128,1,fp8,fp8,0,0.07179520130157471
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,8,128,1,float16,float16,0,0.07742879986763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,8,128,1,float16,fp8,0,0.07232480049133301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,8,128,1,fp8,fp8,0,0.07227519750595093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,40,128,1,float16,float16,0,0.05567359924316406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,40,128,1,float16,fp8,0,0.05383679866790771
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,40,128,1,fp8,fp8,0,0.05416799783706665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,1,128,1,float16,float16,0,0.049534401297569274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,40,128,1,float16,float16,0,0.1639840006828308
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,1,128,1,float16,fp8,0,0.047963199019432065
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,1,128,1,fp8,fp8,0,0.047779199481010434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,40,128,1,float16,fp8,0,0.08545119762420654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,2,128,1,float16,float16,0,0.049563199281692505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,2,128,1,float16,fp8,0,0.04758079946041107
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,2,128,1,fp8,fp8,0,0.047598400712013246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,4,128,1,float16,float16,0,0.049833598732948306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,4,128,1,float16,fp8,0,0.047751998901367186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,4,128,1,fp8,fp8,0,0.04757120013237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,8,128,1,float16,float16,0,0.050588798522949216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,2,128,1,float16,fp8,0,0.07113760113716125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,8,128,1,float16,fp8,0,0.047697600722312924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,8,128,1,fp8,fp8,0,0.047391998767852786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,1,128,1,float16,fp8,0,1.7468496322631837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,1,128,1,float16,float16,0,1.8090831756591796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,2,128,1,float16,fp8,0,1.7413503646850585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,2,128,1,fp8,fp8,0,1.9249664306640626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,1,128,1,fp8,fp8,0,1.764366340637207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,4,128,1,float16,float16,0,1.9645135879516602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,4,128,1,float16,fp8,0,1.7489871978759766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,4,128,1,fp8,fp8,0,1.7424192428588867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,8,128,1,float16,float16,0,1.8306480407714845
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,2,128,1,float16,float16,0,1.6905183792114258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,8,128,1,float16,fp8,0,1.7384096145629884
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,8,128,1,fp8,fp8,0,1.7499391555786132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,40,128,1,float16,float16,0,1.2503040313720704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,40,128,1,float16,fp8,0,1.1704112052917481
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,40,128,1,fp8,fp8,0,1.1766976356506347
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,1,128,1,float16,float16,0,0.8677519798278809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,1,128,1,float16,fp8,0,0.8877360343933105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,1,128,1,fp8,fp8,0,0.8931119918823243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,2,128,1,float16,float16,0,0.9821647644042969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,2,128,1,fp8,fp8,0,0.8848608016967774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,2,128,1,float16,fp8,0,0.8948224067687989
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,4,128,1,float16,float16,0,0.902683162689209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,4,128,1,float16,fp8,0,0.8820367813110351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,4,128,1,fp8,fp8,0,0.8834287643432617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,8,128,1,float16,float16,0,0.9354207992553711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,8,128,1,float16,fp8,0,0.885155200958252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,40,128,1,float16,float16,0,0.6419360160827636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,8,128,1,fp8,fp8,0,0.8809408187866211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,40,128,1,float16,fp8,0,0.6608736038208007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,1,128,1,float16,float16,0,0.45247998237609866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,1,128,1,float16,fp8,0,0.45554242134094236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,40,128,1,fp8,fp8,0,0.6004303932189942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,1,128,1,fp8,fp8,0,0.5111152172088623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,2,128,1,float16,float16,0,0.45270719528198244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,2,128,1,float16,fp8,0,0.4539360046386719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,2,128,1,fp8,fp8,0,0.45897598266601564
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,4,128,1,float16,float16,0,0.4666719913482666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,4,128,1,float16,fp8,0,0.5236544132232666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,4,128,1,fp8,fp8,0,0.4523471832275391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,8,128,1,float16,float16,0,0.48757119178771974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,40,128,1,float16,float16,0,0.33052639961242675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,40,128,1,float16,fp8,0,0.31954240798950195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,40,128,1,fp8,fp8,0,0.3096911907196045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,1,128,1,float16,float16,0,0.2441999912261963
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,1,128,1,float16,fp8,0,0.2382416009902954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,1,128,1,fp8,fp8,0,0.24141919612884521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,2,128,1,float16,float16,0,0.23780479431152343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,2,128,1,float16,fp8,0,0.24120960235595704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,2,128,1,fp8,fp8,0,0.23798720836639403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,4,128,1,float16,float16,0,0.2484879970550537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,4,128,1,float16,fp8,0,0.2377631902694702
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,4,128,1,fp8,fp8,0,0.2405247926712036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,8,128,1,float16,float16,0,0.2508671998977661
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,8,128,1,float16,fp8,0,0.24037439823150636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,8,128,1,fp8,fp8,0,0.2370352029800415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,40,128,1,float16,float16,0,0.17870399951934815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,40,128,1,float16,fp8,0,0.16577759981155396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,40,128,1,fp8,fp8,0,0.1674288034439087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,1,128,1,float16,float16,0,0.13050559759140015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,1,128,1,float16,fp8,0,0.13035520315170288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,8,128,1,fp8,fp8,0,0.4537631988525391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,2,128,1,float16,float16,0,0.13299360275268554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,2,128,1,float16,fp8,0,0.12815840244293214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,2,128,1,fp8,fp8,0,0.1305951952934265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,4,128,1,float16,float16,0,0.1322767972946167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,4,128,1,float16,fp8,0,0.1307904005050659
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,8,128,1,float16,float16,0,0.1401695966720581
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,8,128,1,float16,fp8,0,0.12985919713973998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,8,128,1,fp8,fp8,0,0.12974079847335815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,40,128,1,float16,float16,0,0.09986720085144044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,40,128,1,float16,fp8,0,0.09270880222320557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,40,128,1,fp8,fp8,0,0.09265599846839905
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,1,128,1,float16,float16,0,0.07387840151786804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,1,128,1,float16,fp8,0,0.0726688027381897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,1,128,1,fp8,fp8,0,0.07189279794692993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,2,128,1,float16,float16,0,0.07386080026626587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,2,128,1,float16,fp8,0,0.07317759990692138
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,1,128,1,fp8,fp8,0,0.12896000146865844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,2,128,1,fp8,fp8,0,0.07295680046081543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,4,128,1,float16,float16,0,0.07394400238990784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,4,128,1,float16,fp8,0,0.07249760031700134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,4,128,1,fp8,fp8,0,0.07145599722862243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,4,128,1,fp8,fp8,0,0.12856800556182862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,8,128,1,float16,fp8,0,0.07175999879837036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,8,128,1,fp8,fp8,0,0.072079998254776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,40,128,1,float16,float16,0,0.05648639798164368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,40,128,1,fp8,fp8,0,0.051999998092651364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,1,128,1,float16,float16,0,0.04365600049495697
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,1,128,1,float16,fp8,0,0.043486401438713074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,1,128,1,fp8,fp8,0,0.04349119961261749
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,2,128,1,float16,float16,0,0.044721600413322446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,2,128,1,float16,fp8,0,0.04364959895610809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,2,128,1,fp8,fp8,0,0.04329920113086701
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,4,128,1,float16,float16,0,0.04531359970569611
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,4,128,1,float16,fp8,0,0.04331679940223694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,4,128,1,fp8,fp8,0,0.04338400065898895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,8,128,1,float16,float16,0,0.04565599858760834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,8,128,1,float16,fp8,0,0.4576735973358154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,8,128,1,fp8,fp8,0,0.043803200125694275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,40,128,1,float16,float16,0,0.03560959994792938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,40,128,1,float16,fp8,0,0.0370608001947403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,8,128,1,float16,float16,0,0.079339200258255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,40,128,1,fp8,fp8,0,0.037110400199890134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,1,128,1,float16,fp8,0,0.032913601398468016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,40,128,1,float16,fp8,0,0.05310080051422119
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,1,128,1,fp8,fp8,0,0.03313600122928619
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,2,128,1,float16,float16,0,0.03301439881324768
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,2,128,1,float16,fp8,0,0.033046400547027587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,2,128,1,fp8,fp8,0,0.03319840133190155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,4,128,1,float16,float16,0,0.03320800065994263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,4,128,1,float16,fp8,0,0.03308959901332855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,4,128,1,fp8,fp8,0,0.033188799023628236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,8,128,1,float16,float16,0,0.03346399962902069
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,8,128,1,float16,fp8,0,0.033022400736808774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,8,128,1,fp8,fp8,0,0.033030399680137636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,1,128,1,float16,float16,0,0.033211201429367065
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,1,128,1,float16,float16,0,1.2939567565917969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,1,128,1,float16,fp8,0,1.370251178741455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,1,128,1,fp8,fp8,0,1.3769712448120117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,8,128,1,float16,fp8,0,0.04375520050525665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,2,128,1,float16,float16,0,1.2855999946594239
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,2,128,1,float16,fp8,0,1.371288013458252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,2,128,1,fp8,fp8,0,1.358187198638916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,4,128,1,float16,float16,0,1.311347198486328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,4,128,1,float16,fp8,0,1.604020881652832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,4,128,1,fp8,fp8,0,1.3695903778076173
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,8,128,1,float16,float16,0,1.3998815536499023
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,8,128,1,float16,fp8,0,1.3939647674560547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,8,128,1,fp8,fp8,0,1.3599648475646973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,40,128,1,float16,float16,0,1.0394672393798827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,40,128,1,float16,fp8,0,0.989020824432373
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,40,128,1,fp8,fp8,0,0.970468807220459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,1,128,1,float16,float16,0,0.6613071918487549
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,1,128,1,float16,fp8,0,0.6996960163116455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,2,128,1,float16,float16,0,0.6514800071716309
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,2,128,1,float16,fp8,0,0.6946335792541504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,2,128,1,fp8,fp8,0,0.696895980834961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,4,128,1,float16,float16,0,0.6675648212432861
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,4,128,1,float16,fp8,0,0.7112415790557861
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,4,128,1,fp8,fp8,0,0.6950784206390381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,8,128,1,float16,float16,0,0.7041903972625733
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,8,128,1,float16,fp8,0,0.689140796661377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,8,128,1,fp8,fp8,0,0.6920176029205323
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,40,128,1,float16,float16,0,0.5254191875457763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,40,128,1,float16,fp8,0,0.49772157669067385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,1,128,1,float16,float16,0,0.33764801025390623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,40,128,1,fp8,fp8,0,0.4984288215637207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,1,128,1,float16,fp8,0,0.35969278812408445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,1,128,1,fp8,fp8,0,0.3558160066604614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,1,128,1,fp8,fp8,0,0.6926191806793213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,2,128,1,float16,fp8,0,0.3588383913040161
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,2,128,1,fp8,fp8,0,0.3549887895584106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,4,128,1,float16,float16,0,0.34596478939056396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,4,128,1,float16,fp8,0,0.3562479972839355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,4,128,1,fp8,fp8,0,0.35415360927581785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,8,128,1,float16,float16,0,0.36513121128082277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,8,128,1,fp8,fp8,0,0.3531408071517944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,40,128,1,float16,float16,0,0.27095680236816405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,40,128,1,float16,fp8,0,0.2565088033676147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,40,128,1,fp8,fp8,0,0.25545439720153806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,1,128,1,float16,float16,0,0.1781808018684387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,1,128,1,float16,fp8,0,0.1854431986808777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,1,128,1,fp8,fp8,0,0.1868896007537842
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,2,128,1,float16,float16,0,0.17831679582595825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,2,128,1,float16,fp8,0,0.18577439785003663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,2,128,1,fp8,fp8,0,0.1850640058517456
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,4,128,1,float16,float16,0,0.18332159519195557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,4,128,1,float16,fp8,0,0.18534719944000244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,4,128,1,fp8,fp8,0,0.18548959493637085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,2,128,1,float16,float16,0,0.3409199953079224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,8,128,1,float16,fp8,0,0.18488320112228393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,8,128,1,fp8,fp8,0,0.1866655945777893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,40,128,1,float16,fp8,0,0.1377552032470703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,40,128,1,fp8,fp8,0,0.13769279718399047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,8,128,1,float16,fp8,0,0.354201602935791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,1,128,1,float16,float16,0,0.10024640560150147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,1,128,1,float16,fp8,0,0.10217759609222413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,1,128,1,fp8,fp8,0,0.10176000595092774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,2,128,1,float16,float16,0,0.10070559978485108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,2,128,1,float16,fp8,0,0.1019327998161316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,2,128,1,fp8,fp8,0,0.10147360563278199
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,4,128,1,float16,float16,0,0.10223360061645508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,4,128,1,float16,fp8,0,0.10227040052413941
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,4,128,1,fp8,fp8,0,0.10288800001144409
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,8,128,1,float16,float16,0,0.10729600191116333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,8,128,1,float16,fp8,0,0.10149279832839966
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,40,128,1,float16,float16,0,0.08041920065879822
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,40,128,1,float16,fp8,0,0.07612320184707641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,40,128,1,fp8,fp8,0,0.07612640261650086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,1,128,1,float16,float16,0,0.05403040051460266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,8,128,1,float16,float16,0,0.1925088047981262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,1,128,1,float16,fp8,0,0.055871999263763426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,1,128,1,fp8,fp8,0,0.05557760000228882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,40,128,1,float16,float16,0,0.14435839653015137
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,2,128,1,float16,fp8,0,0.055638402700424194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,2,128,1,fp8,fp8,0,0.055542397499084475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,4,128,1,float16,float16,0,0.056086397171020506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,4,128,1,float16,fp8,0,0.05583360195159912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,8,128,1,float16,float16,0,0.05948479771614075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,8,128,1,float16,fp8,0,0.055499202013015746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,8,128,1,fp8,fp8,0,0.055471998453140256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,40,128,1,float16,float16,0,0.04728800058364868
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,40,128,1,float16,fp8,0,0.04335359930992126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,40,128,1,fp8,fp8,0,0.043931201100349426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,1,128,1,float16,float16,0,0.03503200113773346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,8,128,1,fp8,fp8,0,0.1023136019706726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,1,128,1,float16,fp8,0,0.03519839942455292
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,1,128,1,fp8,fp8,0,0.035359999537467955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,2,128,1,float16,float16,0,0.0352975994348526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,2,128,1,float16,fp8,0,0.035540801286697385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,2,128,1,fp8,fp8,0,0.035703998804092404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,2,128,1,float16,float16,0,0.05481439828872681
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,4,128,1,float16,float16,0,0.0358240008354187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,4,128,1,float16,fp8,0,0.035392001271247864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,4,128,1,fp8,fp8,0,0.035211199522018434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,8,128,1,float16,float16,0,0.03744480013847351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,8,128,1,float16,fp8,0,0.035364800691604616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,8,128,1,fp8,fp8,0,0.035571199655532834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,40,128,1,float16,fp8,0,0.03113119900226593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,40,128,1,fp8,fp8,0,0.031147199869155883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,1,128,1,float16,float16,0,0.027116799354553224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,1,128,1,float16,fp8,0,0.027105599641799927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,1,128,1,fp8,fp8,0,0.026950401067733765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,2,128,1,float16,float16,0,0.026958400011062623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,2,128,1,float16,fp8,0,0.026972800493240356
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,2,128,1,fp8,fp8,0,0.027062401175498962
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,4,128,1,float16,float16,0,0.027156800031661987
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,4,128,1,float16,fp8,0,0.027326399087905885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,4,128,1,fp8,fp8,0,0.027143999934196472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,8,128,1,float16,float16,0,0.026892799139022826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,8,128,1,float16,fp8,0,0.027036800980567932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,8,128,1,fp8,fp8,0,0.026894399523735048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,40,128,1,float16,float16,0,0.020951999723911284
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,40,128,1,float16,fp8,0,0.02072480022907257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,4,128,1,fp8,fp8,0,0.05614240169525146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,40,128,1,fp8,fp8,0,0.02090719938278198
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,40,128,1,float16,float16,0,0.02921760082244873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,1,128,1,fp8,fp8,0,0.01879200041294098
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,2,128,1,float16,float16,0,0.018651199340820313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,2,128,1,fp8,fp8,0,0.01865279972553253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,4,128,1,float16,float16,0,0.018775999546051025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,4,128,1,float16,fp8,0,0.018783999979496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,4,128,1,fp8,fp8,0,0.018643200397491455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,8,128,1,float16,float16,0,0.018638400733470915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,8,128,1,float16,fp8,0,0.018862399458885192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,8,128,1,fp8,fp8,0,0.018755200505256652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,1,128,1,float16,float16,0,0.5398831844329834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,1,128,1,float16,fp8,0,0.5895103931427002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,1,128,1,float16,float16,0,0.018675200641155243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,1,128,1,fp8,fp8,0,0.5889855861663819
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,1,128,1,float16,fp8,0,0.018724800646305086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,2,128,1,float16,fp8,0,0.01875839978456497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,2,128,1,float16,float16,0,0.5329296112060546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,2,128,1,float16,fp8,0,0.5889887809753418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,2,128,1,fp8,fp8,0,0.5846735954284668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,4,128,1,float16,float16,0,0.5563151836395264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,4,128,1,float16,fp8,0,0.5830992221832275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,8,128,1,float16,float16,0,0.58711838722229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,4,128,1,fp8,fp8,0,0.5878911972045898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,8,128,1,float16,fp8,0,0.5871967792510986
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,8,128,1,fp8,fp8,0,0.5821248054504394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,40,128,1,float16,float16,0,0.4630879878997803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,40,128,1,float16,fp8,0,0.4418479919433594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,40,128,1,fp8,fp8,0,0.4416287899017334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,1,128,1,float16,float16,0,0.2758415937423706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,1,128,1,float16,fp8,0,0.3
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,1,128,1,fp8,fp8,0,0.30177280902862547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,2,128,1,float16,float16,0,0.27763519287109373
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,2,128,1,fp8,fp8,0,0.29949278831481935
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,4,128,1,float16,float16,0,0.2871056079864502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,4,128,1,float16,fp8,0,0.3018111944198608
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,4,128,1,fp8,fp8,0,0.29900479316711426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,8,128,1,float16,float16,0,0.3058640003204346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,8,128,1,float16,fp8,0,0.3016047954559326
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,8,128,1,fp8,fp8,0,0.2981935977935791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,40,128,1,float16,float16,0,0.23987839221954346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,40,128,1,float16,fp8,0,0.22870879173278807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,40,128,1,fp8,fp8,0,0.2267199993133545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,1,128,1,float16,float16,0,0.14671679735183715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,1,128,1,float16,fp8,0,0.15651359558105468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,1,128,1,fp8,fp8,0,0.15578240156173706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,2,128,1,float16,fp8,0,0.1561087965965271
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,2,128,1,fp8,fp8,0,0.15523359775543213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,4,128,1,float16,float16,0,0.15035840272903442
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,4,128,1,float16,fp8,0,0.1543023943901062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,4,128,1,fp8,fp8,0,0.1559216022491455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,8,128,1,float16,float16,0,0.15845760107040405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,8,128,1,float16,fp8,0,0.15618239641189574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,8,128,1,fp8,fp8,0,0.1545807957649231
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,40,128,1,float16,float16,0,0.12812960147857666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,40,128,1,float16,fp8,0,0.12126719951629639
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,40,128,1,fp8,fp8,0,0.12216639518737793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,1,128,1,float16,float16,0,0.08046879768371581
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,1,128,1,float16,fp8,0,0.08610720038414002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,1,128,1,fp8,fp8,0,0.08449280261993408
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,2,128,1,float16,float16,0,0.08170239925384522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,2,128,1,float16,fp8,0,0.08467199802398681
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,2,128,1,fp8,fp8,0,0.08650559782981873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,4,128,1,float16,float16,0,0.08268160223960877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,4,128,1,float16,fp8,0,0.08640639781951905
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,4,128,1,fp8,fp8,0,0.08511360287666321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,8,128,1,float16,float16,0,0.08966400027275086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,8,128,1,float16,fp8,0,0.0847216010093689
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,8,128,1,fp8,fp8,0,0.08642879724502564
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,40,128,1,float16,float16,0,0.07241920232772828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,40,128,1,float16,fp8,0,0.06966400146484375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,2,128,1,float16,float16,0,0.14658080339431762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,40,128,1,fp8,fp8,0,0.06968960165977478
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,1,128,1,float16,float16,0,0.04737919867038727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,1,128,1,float16,fp8,0,0.04949440062046051
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,1,128,1,fp8,fp8,0,0.04960800111293793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,2,128,1,float16,float16,0,0.047644799947738646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,2,128,1,float16,fp8,0,0.049527999758720395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,4,128,1,float16,float16,0,0.049579200148582456
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,4,128,1,float16,fp8,0,0.049553599953651425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,4,128,1,fp8,fp8,0,0.04955999851226807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,8,128,1,float16,float16,0,0.052985602617263795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,2,128,1,float16,fp8,0,0.2996783971786499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,8,128,1,float16,fp8,0,0.04957759976387024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,8,128,1,fp8,fp8,0,0.04937599897384644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,40,128,1,float16,float16,0,0.044014400243759154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,40,128,1,fp8,fp8,0,0.041315200924873355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,1,128,1,float16,float16,0,0.031167998909950256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,1,128,1,float16,fp8,0,0.033057600259780884
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,1,128,1,fp8,fp8,0,0.033283200860023496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,2,128,1,float16,float16,0,0.0312032014131546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,2,128,1,float16,fp8,0,0.033155199885368344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,2,128,1,fp8,fp8,0,0.033267199993133545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,4,128,1,float16,float16,0,0.03287039995193482
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,4,128,1,float16,fp8,0,0.033083200454711914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,2,128,1,fp8,fp8,0,0.05108320116996765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,4,128,1,fp8,fp8,0,0.0331167995929718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,8,128,1,float16,float16,0,0.03330720067024231
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,8,128,1,fp8,fp8,0,0.03309600055217743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,40,128,1,float16,float16,0,0.026956799626350402
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,40,128,1,float16,fp8,0,0.02895039916038513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,40,128,1,fp8,fp8,0,0.028939199447631837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,40,128,1,float16,fp8,0,0.04121600091457367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,1,128,1,float16,float16,0,0.02298240065574646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,1,128,1,float16,fp8,0,0.023020799458026885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,1,128,1,fp8,fp8,0,0.024801599979400634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,2,128,1,float16,fp8,0,0.022918400168418885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,2,128,1,fp8,fp8,0,0.022947199642658234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,4,128,1,float16,fp8,0,0.02303680032491684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,4,128,1,fp8,fp8,0,0.023000000417232512
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,8,128,1,float16,float16,0,0.024771200120449068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,8,128,1,float16,fp8,0,0.02293439954519272
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,8,128,1,fp8,fp8,0,0.023175999522209167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,40,128,1,float16,float16,0,0.01871200054883957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,40,128,1,float16,fp8,0,0.018884800374507904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,40,128,1,fp8,fp8,0,0.018904000520706177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,1,128,1,float16,float16,0,0.016673600673675536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,8,128,1,float16,fp8,0,0.03299199938774109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,1,128,1,float16,fp8,0,0.016737599670886994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,1,128,1,fp8,fp8,0,0.01664000004529953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,2,128,1,float16,float16,0,0.01672160029411316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,2,128,1,float16,fp8,0,0.016572800278663636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,2,128,1,fp8,fp8,0,0.016758400201797485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,4,128,1,float16,float16,0,0.01666879951953888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,4,128,1,float16,fp8,0,0.016788800060749055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,4,128,1,fp8,fp8,0,0.016681599617004394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,8,128,1,float16,float16,0,0.016840000450611115
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,8,128,1,float16,fp8,0,0.01675039976835251
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,8,128,1,fp8,fp8,0,0.016663999855518342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,40,128,1,float16,float16,0,0.016820800304412842
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,40,128,1,float16,fp8,0,0.016777600347995757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,40,128,1,fp8,fp8,0,0.01677280068397522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,2,128,1,float16,float16,0,0.022836799919605254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,1,128,1,float16,float16,0,0.016569599509239197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,1,128,1,fp8,fp8,0,0.016579200327396394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,2,128,1,float16,float16,0,0.01640319973230362
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,2,128,1,float16,fp8,0,0.01587360054254532
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,1,128,1,float16,fp8,0,0.016545599699020384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,4,128,1,float16,float16,0,0.016044799983501435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,4,128,1,float16,float16,0,0.022916799783706664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,4,128,1,fp8,fp8,0,0.015167999267578124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,8,128,1,float16,fp8,0,0.015251199901103973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,8,128,1,float16,float16,0,0.01664000004529953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,8,128,1,fp8,fp8,0,0.016569599509239197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,1,128,1,float16,float16,0,0.32591359615325927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,1,128,1,float16,fp8,0,0.35155999660491943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,1,128,1,fp8,fp8,0,0.3510767936706543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,2,128,1,float16,float16,0,0.32574241161346434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,2,128,1,float16,fp8,0,0.35065760612487795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,2,128,1,fp8,fp8,0,0.3500224113464355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,4,128,1,float16,float16,0,0.33417439460754395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,4,128,1,fp8,fp8,0,0.34886720180511477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,8,128,1,float16,float16,0,0.3517040014266968
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,4,128,1,float16,fp8,0,0.016568000614643096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,8,128,1,float16,fp8,0,0.34840641021728513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,8,128,1,fp8,fp8,0,0.3510351896286011
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,40,128,1,float16,float16,0,0.2602799892425537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,40,128,1,float16,fp8,0,0.25110559463500975
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,40,128,1,fp8,fp8,0,0.2503727912902832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,1,128,1,float16,float16,0,0.17086880207061766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,1,128,1,float16,fp8,0,0.18083360195159912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,1,128,1,fp8,fp8,0,0.18214399814605714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,2,128,1,float16,float16,0,0.16931840181350707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,2,128,1,float16,fp8,0,0.1808527946472168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,2,128,1,fp8,fp8,0,0.18074079751968383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,4,128,1,float16,fp8,0,0.34916958808898924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,4,128,1,fp8,fp8,0,0.18065600395202636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,8,128,1,float16,float16,0,0.18292479515075682
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,8,128,1,float16,fp8,0,0.18061439990997313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,2,128,1,fp8,fp8,0,0.01656319946050644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,8,128,1,fp8,fp8,0,0.1802896022796631
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,40,128,1,float16,float16,0,0.13367199897766113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,40,128,1,float16,fp8,0,0.13050559759140015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,40,128,1,fp8,fp8,0,0.1301568031311035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,1,128,1,float16,float16,0,0.09098880290985108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,1,128,1,float16,fp8,0,0.0949343979358673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,1,128,1,fp8,fp8,0,0.09488639831542969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,2,128,1,float16,float16,0,0.09166880249977112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,2,128,1,float16,fp8,0,0.09475200176239014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,4,128,1,float16,float16,0,0.17581119537353515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,2,128,1,fp8,fp8,0,0.09462720155715942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,4,128,1,float16,float16,0,0.09415680170059204
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,4,128,1,fp8,fp8,0,0.09608479738235473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,8,128,1,float16,float16,0,0.09935839772224427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,8,128,1,float16,fp8,0,0.09654240012168884
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,8,128,1,fp8,fp8,0,0.09643840193748474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,40,128,1,float16,float16,0,0.07353439927101135
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,40,128,1,float16,fp8,0,0.0723743975162506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,40,128,1,fp8,fp8,0,0.07211520075798035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,1,128,1,float16,float16,0,0.05020639896392822
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,1,128,1,float16,fp8,0,0.05341759920120239
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,1,128,1,fp8,fp8,0,0.05334399938583374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,2,128,1,float16,float16,0,0.051072001457214355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,2,128,1,float16,fp8,0,0.053857600688934325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,4,128,1,float16,fp8,0,0.1806720018386841
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,2,128,1,fp8,fp8,0,0.05363839864730835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,4,128,1,float16,fp8,0,0.053574401140213015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,4,128,1,float16,float16,0,0.052348798513412474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,4,128,1,fp8,fp8,0,0.05390560030937195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,8,128,1,float16,float16,0,0.05570240020751953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,8,128,1,float16,fp8,0,0.05335680246353149
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,40,128,1,float16,float16,0,0.0435696005821228
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,8,128,1,fp8,fp8,0,0.053887999057769774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,40,128,1,float16,fp8,0,0.041596800088882446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,40,128,1,fp8,fp8,0,0.041305598616600034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,1,128,1,float16,float16,0,0.03144960105419159
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,1,128,1,fp8,fp8,0,0.03198559880256653
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,1,128,1,float16,fp8,0,0.0313728004693985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,2,128,1,float16,fp8,0,0.03221119940280914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,2,128,1,fp8,fp8,0,0.031446400284767154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,4,128,1,float16,float16,0,0.031115201115608216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,4,128,1,float16,fp8,0,0.03143360018730164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,8,128,1,float16,float16,0,0.03149439990520477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,8,128,1,float16,fp8,0,0.031227201223373413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,8,128,1,fp8,fp8,0,0.03129439949989319
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,40,128,1,float16,float16,0,0.025012800097465517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,40,128,1,float16,fp8,0,0.02704159915447235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,40,128,1,fp8,fp8,0,0.02705279886722565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,1,128,1,float16,float16,0,0.020875200629234314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,1,128,1,float16,fp8,0,0.022096000611782074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,1,128,1,fp8,fp8,0,0.022655999660491942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,2,128,1,float16,float16,0,0.020951999723911284
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,2,128,1,float16,fp8,0,0.021209600567817687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,2,128,1,fp8,fp8,0,0.02081120014190674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,4,128,1,float16,float16,0,0.02075359970331192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,2,128,1,float16,float16,0,0.03139840066432953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,4,128,1,float16,fp8,0,0.020931200683116914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,4,128,1,fp8,fp8,0,0.022430400550365447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,8,128,1,float16,float16,0,0.022779199481010436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,4,128,1,fp8,fp8,0,0.031784000992774966
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,8,128,1,float16,fp8,0,0.022519999742507936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,8,128,1,fp8,fp8,0,0.02205120027065277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,40,128,1,float16,float16,0,0.018592000007629395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,40,128,1,float16,fp8,0,0.018799999356269838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,1,128,1,float16,float16,0,0.01674560010433197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,40,128,1,fp8,fp8,0,0.018780800700187682
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,1,128,1,fp8,fp8,0,0.01674720048904419
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,2,128,1,float16,float16,0,0.016606399416923524
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,2,128,1,float16,fp8,0,0.01679999977350235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,2,128,1,fp8,fp8,0,0.01658080071210861
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,4,128,1,float16,float16,0,0.016760000586509706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,4,128,1,float16,fp8,0,0.016577599942684172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,4,128,1,fp8,fp8,0,0.016836799681186676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,8,128,1,float16,float16,0,0.016655999422073364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,8,128,1,float16,fp8,0,0.016710400581359863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,8,128,1,fp8,fp8,0,0.016680000722408293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,40,128,1,float16,fp8,0,0.012580800056457519
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,40,128,1,fp8,fp8,0,0.012617599964141846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,4,128,1,float16,fp8,0,0.09450560212135314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,1,128,1,float16,float16,0,0.01249919980764389
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,1,128,1,float16,fp8,0,0.012520000338554382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,1,128,1,fp8,fp8,0,0.012606400251388549
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,2,128,1,float16,fp8,0,0.012620800733566284
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,2,128,1,fp8,fp8,0,0.012556800246238708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,4,128,1,float16,float16,0,0.012694400548934937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,4,128,1,float16,fp8,0,0.012614400684833526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,4,128,1,fp8,fp8,0,0.012537600100040435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,8,128,1,float16,float16,0,0.01255359947681427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,8,128,1,float16,fp8,0,0.012615999579429627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,8,128,1,fp8,fp8,0,0.01260959953069687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,40,128,1,float16,float16,0,0.012574400007724761
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,1,128,1,float16,fp8,0,0.016867199540138246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,40,128,1,float16,fp8,0,0.012652799487113953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,40,128,1,fp8,fp8,0,0.012705600261688233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,1,128,1,float16,float16,0,0.012697599828243256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,1,128,1,float16,fp8,0,0.012574400007724761
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,2,128,1,float16,float16,0,0.012435200065374375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,2,128,1,float16,fp8,0,0.012465599924325943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,40,128,1,float16,float16,0,0.01430879980325699
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,2,128,1,fp8,fp8,0,0.0125231996178627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,4,128,1,float16,float16,0,0.012574400007724761
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,4,128,1,float16,fp8,0,0.012649600207805634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,4,128,1,fp8,fp8,0,0.01255200058221817
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,8,128,1,float16,float16,0,0.012561599910259246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,2,128,1,float16,float16,0,0.012494400143623352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,8,128,1,float16,fp8,0,0.012585599720478059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,1,128,1,float16,float16,0,0.25471360683441163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,1,128,1,float16,fp8,0,0.26375679969787597
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,1,128,1,fp8,fp8,0,0.2641151905059814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,2,128,1,float16,float16,0,0.25365118980407714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,1,128,1,fp8,fp8,0,0.012571200728416443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,2,128,1,float16,fp8,0,0.2630959987640381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,4,128,1,float16,float16,0,0.2585472106933594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,4,128,1,float16,fp8,0,0.26278080940246584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,8,128,1,fp8,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,8,128,1,float16,float16,0,0.26449439525604246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,8,128,1,float16,fp8,0,0.2606208086013794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,8,128,1,fp8,fp8,0,0.26056480407714844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,40,128,1,float16,fp8,0,0.17047040462493895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,40,128,1,float16,float16,0,0.17670719623565673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,40,128,1,fp8,fp8,0,0.17075999975204467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,1,128,1,float16,float16,0,0.13364160060882568
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,1,128,1,float16,fp8,0,0.13608319759368898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,1,128,1,fp8,fp8,0,0.1377776026725769
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,2,128,1,float16,float16,0,0.1318336009979248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,2,128,1,fp8,fp8,0,0.26132960319519044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,2,128,1,fp8,fp8,0,0.1356768012046814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,4,128,1,float16,float16,0,0.13495839834213258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,4,128,1,fp8,fp8,0,0.13554879426956176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,4,128,1,fp8,fp8,0,0.26264638900756837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,8,128,1,float16,float16,0,0.13912320137023926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,8,128,1,float16,fp8,0,0.13585920333862306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,8,128,1,fp8,fp8,0,0.13562400341033937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,40,128,1,float16,float16,0,0.09182559847831726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,40,128,1,float16,fp8,0,0.09049280285835266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,40,128,1,fp8,fp8,0,0.09036960005760193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,1,128,1,float16,float16,0,0.07020959854125977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,1,128,1,fp8,fp8,0,0.07177919745445252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,2,128,1,float16,float16,0,0.07010239958763123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,2,128,1,float16,fp8,0,0.0719968020915985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,2,128,1,fp8,fp8,0,0.07208319902420043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,4,128,1,float16,float16,0,0.07182239890098571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,4,128,1,float16,fp8,0,0.07197279930114746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,2,128,1,float16,fp8,0,0.1375264048576355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,4,128,1,fp8,fp8,0,0.07201600074768066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,8,128,1,float16,float16,0,0.0755504012107849
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,8,128,1,float16,fp8,0,0.07221919894218445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,4,128,1,float16,fp8,0,0.1365407943725586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,8,128,1,fp8,fp8,0,0.07197440266609192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,40,128,1,float16,float16,0,0.051819199323654176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,40,128,1,float16,fp8,0,0.0494159996509552
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,40,128,1,fp8,fp8,0,0.04968799948692322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,1,128,1,float16,fp8,0,0.04118880033493042
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,2,128,1,float16,float16,0,0.03914720118045807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,1,128,1,fp8,fp8,0,0.04131839871406555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,2,128,1,float16,fp8,0,0.040696001052856444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,2,128,1,fp8,fp8,0,0.041529598832130435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,4,128,1,float16,float16,0,0.03940640091896057
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,4,128,1,float16,fp8,0,0.04130719900131226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,4,128,1,fp8,fp8,0,0.04122079908847809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,8,128,1,float16,fp8,0,0.04112319946289063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,8,128,1,fp8,fp8,0,0.041140800714492796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,1,128,1,float16,fp8,0,0.07199199795722962
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,40,128,1,float16,float16,0,0.0272816002368927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,40,128,1,float16,fp8,0,0.03094879984855652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,40,128,1,fp8,fp8,0,0.03064799904823303
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,1,128,1,float16,float16,0,0.025003200769424437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,1,128,1,float16,fp8,0,0.024868799746036528
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,1,128,1,fp8,fp8,0,0.02640799880027771
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,2,128,1,float16,float16,0,0.024772800505161285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,2,128,1,float16,fp8,0,0.026811200380325317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,4,128,1,float16,float16,0,0.025113600492477416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,4,128,1,float16,fp8,0,0.026926401257514953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,4,128,1,fp8,fp8,0,0.026627200841903686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,8,128,1,float16,float16,0,0.026870399713516235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,8,128,1,float16,fp8,0,0.024967999756336214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,8,128,1,fp8,fp8,0,0.025150400400161744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,40,128,1,float16,float16,0,0.018785600364208222
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,40,128,1,float16,fp8,0,0.020713600516319274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,40,128,1,fp8,fp8,0,0.020771199464797975
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,1,128,1,float16,float16,0,0.04009119868278503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,1,128,1,float16,float16,0,0.018628799915313722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,1,128,1,float16,fp8,0,0.01920959949493408
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,1,128,1,fp8,fp8,0,0.018587200343608855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,2,128,1,float16,float16,0,0.01868959963321686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,2,128,1,float16,fp8,0,0.018628799915313722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,2,128,1,fp8,fp8,0,0.018779200315475465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,4,128,1,float16,float16,0,0.018587200343608855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,4,128,1,float16,fp8,0,0.01867839992046356
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,8,128,1,float16,float16,0,0.04143680036067963
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,8,128,1,float16,float16,0,0.01865279972553253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,8,128,1,float16,fp8,0,0.018614399433135986
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,8,128,1,fp8,fp8,0,0.01865279972553253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,40,128,1,float16,float16,0,0.01626719981431961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,40,128,1,float16,fp8,0,0.015132799744606018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,40,128,1,fp8,fp8,0,0.01646080017089844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,1,128,1,float16,float16,0,0.014662399888038635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,1,128,1,float16,fp8,0,0.014665600657463074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,1,128,1,fp8,fp8,0,0.01462240070104599
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,2,128,1,fp8,fp8,0,0.024886399507522583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,2,128,1,float16,fp8,0,0.014612799882888794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,2,128,1,fp8,fp8,0,0.01464959979057312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,4,128,1,float16,float16,0,0.014587199687957764
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,4,128,1,float16,fp8,0,0.014735999703407287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,4,128,1,fp8,fp8,0,0.01465280055999756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,8,128,1,float16,float16,0,0.014628799259662628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,8,128,1,float16,fp8,0,0.014633600413799287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,8,128,1,fp8,fp8,0,0.0147024005651474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,40,128,1,float16,float16,0,0.013212800025939941
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,40,128,1,float16,fp8,0,0.012488000094890594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,40,128,1,fp8,fp8,0,0.012665599584579468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,4,128,1,fp8,fp8,0,0.018636800348758698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,1,128,1,float16,float16,0,0.010633599758148194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,1,128,1,float16,fp8,0,0.011283200234174728
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,1,128,1,fp8,fp8,0,0.010599999874830245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,2,128,1,float16,fp8,0,0.010609599947929382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,2,128,1,fp8,fp8,0,0.01064160019159317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,2,128,1,float16,float16,0,0.01467680037021637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,4,128,1,float16,fp8,0,0.012547199428081513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,4,128,1,fp8,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,8,128,1,float16,float16,0,0.010657600313425063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,8,128,1,float16,fp8,0,0.011238399893045425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,8,128,1,fp8,fp8,0,0.011380799859762192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,40,128,1,float16,float16,0,0.012678399682044983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,40,128,1,float16,fp8,0,0.01271200031042099
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,40,128,1,fp8,fp8,0,0.012612800300121307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,1,128,1,float16,float16,0,0.010683199763298035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,1,128,1,float16,fp8,0,0.010699199885129929
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,1,128,1,fp8,fp8,0,0.010819199681282043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,2,128,1,float16,float16,0,0.010716799646615982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,2,128,1,float16,fp8,0,0.010611200332641601
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,4,128,1,float16,float16,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,4,128,1,float16,fp8,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,4,128,1,fp8,fp8,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,8,128,1,float16,float16,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,8,128,1,float16,fp8,0,0.010564800351858139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,8,128,1,fp8,fp8,0,0.010596799850463866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,1,128,1,float16,float16,0,0.21607520580291747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,1,128,1,float16,fp8,0,0.21955358982086182
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,1,128,1,fp8,fp8,0,0.21963200569152833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,2,128,1,float16,float16,0,0.21573278903961182
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,2,128,1,float16,fp8,0,0.21963839530944823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,2,128,1,fp8,fp8,0,0.21884479522705078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,2,128,1,float16,float16,0,0.010564800351858139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,4,128,1,float16,float16,0,0.21739039421081544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,4,128,1,float16,fp8,0,0.2217344045639038
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,4,128,1,fp8,fp8,0,0.2182159900665283
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,4,128,1,float16,float16,0,0.010639999806880952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,8,128,1,float16,fp8,0,0.21833760738372804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,8,128,1,fp8,fp8,0,0.21952319145202637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,40,128,1,float16,float16,0,0.13351839780807495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,40,128,1,float16,fp8,0,0.13190399408340453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,40,128,1,fp8,fp8,0,0.13171520233154296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,1,128,1,float16,float16,0,0.11174880266189575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,1,128,1,float16,fp8,0,0.11309119462966918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,1,128,1,fp8,fp8,0,0.11317280530929566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,2,128,1,float16,float16,0,0.11316159963607789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,2,128,1,float16,fp8,0,0.11318399906158447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,4,128,1,float16,float16,0,0.11534240245819091
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,4,128,1,float16,fp8,0,0.11301280260086059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,4,128,1,fp8,fp8,0,0.1131824016571045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,8,128,1,float16,float16,0,0.1173632025718689
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,2,128,1,fp8,fp8,0,0.010691200196743012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,8,128,1,float16,fp8,0,0.11329439878463746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,8,128,1,fp8,fp8,0,0.11305919885635377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,40,128,1,float16,float16,0,0.07306399941444397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,40,128,1,float16,fp8,0,0.06987839937210083
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,1,128,1,float16,float16,0,0.061627197265625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,40,128,1,fp8,fp8,0,0.06969760060310363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,1,128,1,float16,fp8,0,0.06127520203590393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,8,128,1,float16,float16,0,0.22529919147491456
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,1,128,1,fp8,fp8,0,0.060022401809692386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,2,128,1,float16,float16,0,0.06198400259017944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,2,128,1,float16,fp8,0,0.06142399907112121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,2,128,1,fp8,fp8,0,0.06006240248680115
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,4,128,1,float16,fp8,0,0.060622400045394896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,4,128,1,fp8,fp8,0,0.05981600284576416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,8,128,1,float16,float16,0,0.06333119869232177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,8,128,1,float16,fp8,0,0.05988320112228394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,8,128,1,fp8,fp8,0,0.060622400045394896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,40,128,1,float16,fp8,0,0.03916319906711578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,2,128,1,fp8,fp8,0,0.11349120140075683
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,1,128,1,float16,float16,0,0.03511039912700653
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,40,128,1,fp8,fp8,0,0.03927839994430542
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,1,128,1,float16,fp8,0,0.03506560027599335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,1,128,1,fp8,fp8,0,0.035209599137306216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,2,128,1,float16,float16,0,0.035025599598884585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,2,128,1,float16,fp8,0,0.03521920144557953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,2,128,1,fp8,fp8,0,0.03513599932193756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,4,128,1,float16,float16,0,0.03547520041465759
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,4,128,1,float16,fp8,0,0.03511520028114319
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,8,128,1,float16,float16,0,0.03547999858856201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,8,128,1,float16,fp8,0,0.03519999980926514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,8,128,1,fp8,fp8,0,0.03519040048122406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,40,128,1,float16,float16,0,0.024758400022983552
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,40,128,1,float16,fp8,0,0.02479040026664734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,40,128,1,fp8,fp8,0,0.024798400700092316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,1,128,1,float16,float16,0,0.022776000201702118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,1,128,1,float16,fp8,0,0.022833600640296936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,1,128,1,fp8,fp8,0,0.022755199670791627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,4,128,1,float16,float16,0,0.061768001317977904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,2,128,1,float16,float16,0,0.02281759977340698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,2,128,1,float16,fp8,0,0.02282879948616028
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,2,128,1,fp8,fp8,0,0.022835199534893037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,4,128,1,float16,float16,0,0.022836799919605254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,40,128,1,float16,float16,0,0.037771201133728026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,4,128,1,float16,fp8,0,0.022867199778556824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,8,128,1,float16,float16,0,0.022811199724674224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,8,128,1,float16,fp8,0,0.022753599286079406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,8,128,1,fp8,fp8,0,0.022935999929904936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,40,128,1,float16,float16,0,0.016971200704574585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,40,128,1,float16,fp8,0,0.017980800569057466
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,40,128,1,fp8,fp8,0,0.0176816001534462
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,1,128,1,float16,float16,0,0.016599999368190767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,4,128,1,fp8,fp8,0,0.03542239964008331
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,1,128,1,float16,fp8,0,0.016651199758052827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,2,128,1,float16,float16,0,0.016728000342845918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,2,128,1,float16,fp8,0,0.016612799465656282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,2,128,1,fp8,fp8,0,0.016641600430011748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,4,128,1,float16,float16,0,0.016748799383640288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,4,128,1,float16,fp8,0,0.016630400717258454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,4,128,1,fp8,fp8,0,0.016604800522327424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,8,128,1,float16,float16,0,0.016630400717258454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,8,128,1,float16,fp8,0,0.016707199811935424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,8,128,1,fp8,fp8,0,0.01679839938879013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,40,128,1,float16,float16,0,0.014644800126552582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,40,128,1,float16,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,40,128,1,fp8,fp8,0,0.014444799721240997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,1,128,1,float16,float16,0,0.014416000247001648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,1,128,1,float16,fp8,0,0.01438400000333786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,1,128,1,fp8,fp8,0,0.014507199823856353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,4,128,1,fp8,fp8,0,0.02282560020685196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,2,128,1,float16,fp8,0,0.014056000113487243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,2,128,1,fp8,fp8,0,0.013872000575065612
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,4,128,1,float16,float16,0,0.012984000146389008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,4,128,1,float16,fp8,0,0.014078399538993836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,8,128,1,float16,float16,0,0.013259199261665345
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,4,128,1,fp8,fp8,0,0.013331200182437896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,8,128,1,float16,fp8,0,0.01393119990825653
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,8,128,1,fp8,fp8,0,0.012988799810409546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,1,128,1,fp8,fp8,0,0.016950400173664094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,40,128,1,float16,float16,0,0.012572799623012543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,40,128,1,float16,fp8,0,0.010815999656915664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,40,128,1,fp8,fp8,0,0.010630399733781815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,1,128,1,float16,float16,0,0.010627199709415436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,1,128,1,float16,fp8,0,0.011073599755764007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,1,128,1,fp8,fp8,0,0.010625600069761276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,2,128,1,float16,float16,0,0.010502400249242783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,2,128,1,float16,fp8,0,0.010870400071144103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,4,128,1,float16,float16,0,0.010547199845314026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,2,128,1,fp8,fp8,0,0.01067039966583252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,4,128,1,float16,fp8,0,0.01077599972486496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,4,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,8,128,1,float16,float16,0,0.010608000308275222
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,8,128,1,float16,fp8,0,0.010875199735164643
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,40,128,1,float16,float16,0,0.012203200161457062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,40,128,1,float16,fp8,0,0.010758399963378906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,40,128,1,fp8,fp8,0,0.010569600015878677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,1,128,1,float16,float16,0,0.010633599758148194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,1,128,1,float16,fp8,0,0.01053759977221489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,1,128,1,fp8,fp8,0,0.010811199992895126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,2,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,2,128,1,float16,fp8,0,0.010745599865913391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,2,128,1,fp8,fp8,0,0.010569600015878677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,4,128,1,float16,fp8,0,0.01064160019159317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,2,128,1,float16,float16,0,0.014407999813556671
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,8,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,8,128,1,float16,fp8,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,8,128,1,fp8,fp8,0,0.010608000308275222
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,1,128,1,float16,float16,0,0.2089776039123535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,1,128,1,float16,fp8,0,0.1976207971572876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,1,128,1,fp8,fp8,0,0.1977776050567627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,2,128,1,float16,float16,0,0.20878880023956298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,2,128,1,float16,fp8,0,0.19819200038909912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,8,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,2,128,1,fp8,fp8,0,0.19912960529327392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,4,128,1,float16,float16,0,0.20761919021606445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,4,128,1,float16,fp8,0,0.19910240173339844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,4,128,1,fp8,fp8,0,0.19704320430755615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,8,128,1,float16,float16,0,0.21229441165924073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,4,128,1,float16,float16,0,0.01071999967098236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,8,128,1,float16,fp8,0,0.19737279415130615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,4,128,1,fp8,fp8,0,0.010707200318574906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,40,128,1,float16,float16,0,0.120632004737854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,40,128,1,float16,fp8,0,0.11208479404449463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,40,128,1,fp8,fp8,0,0.11290880441665649
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,1,128,1,float16,float16,0,0.10777440071105956
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,1,128,1,float16,fp8,0,0.10287679433822632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,1,128,1,fp8,fp8,0,0.1032480001449585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,2,128,1,float16,float16,0,0.10773119926452637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,2,128,1,float16,fp8,0,0.10284320116043091
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,2,128,1,fp8,fp8,0,0.10315519571304321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,4,128,1,float16,float16,0,0.10897760391235352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,4,128,1,float16,fp8,0,0.10296319723129273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,4,128,1,fp8,fp8,0,0.1030784010887146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,8,128,1,float16,float16,0,0.10938400030136108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,8,128,1,fp8,fp8,0,0.1034351944923401
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,40,128,1,float16,float16,0,0.06244000196456909
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,40,128,1,float16,fp8,0,0.05988479852676391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,40,128,1,fp8,fp8,0,0.05992159843444824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,1,128,1,float16,float16,0,0.05981760025024414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,1,128,1,float16,fp8,0,0.05570080280303955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,1,128,1,fp8,fp8,0,0.055726397037506106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,2,128,1,float16,float16,0,0.05999199748039245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,2,128,1,float16,fp8,0,0.05571200251579285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,2,128,1,fp8,fp8,0,0.055899202823638916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,8,128,1,fp8,fp8,0,0.19910080432891847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,4,128,1,float16,float16,0,0.060078400373458865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,4,128,1,float16,fp8,0,0.055739200115203856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,4,128,1,fp8,fp8,0,0.05574079751968384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,8,128,1,float16,float16,0,0.06085280179977417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,8,128,1,float16,fp8,0,0.05582399964332581
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,8,128,1,fp8,fp8,0,0.0557807981967926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,40,128,1,float16,float16,0,0.03700000047683716
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,40,128,1,float16,fp8,0,0.03504799902439117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,1,128,1,float16,float16,0,0.03513120114803314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,8,128,1,float16,fp8,0,0.10278559923171997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,1,128,1,fp8,fp8,0,0.03303999900817871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,2,128,1,float16,float16,0,0.035067200660705566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,2,128,1,float16,fp8,0,0.03315840065479279
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,2,128,1,fp8,fp8,0,0.0331743985414505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,4,128,1,float16,float16,0,0.035087999701499936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,4,128,1,float16,fp8,0,0.03303999900817871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,4,128,1,fp8,fp8,0,0.033092799782752993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,8,128,1,float16,float16,0,0.03506399989128113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,8,128,1,float16,fp8,0,0.03307999968528748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,40,128,1,float16,float16,0,0.02284799963235855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,8,128,1,fp8,fp8,0,0.033318400382995605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,40,128,1,float16,fp8,0,0.022870400547981264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,40,128,1,fp8,fp8,0,0.023056000471115112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,1,128,1,float16,float16,0,0.022776000201702118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,1,128,1,float16,fp8,0,0.021134400367736818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,1,128,1,fp8,fp8,0,0.020929600298404693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,2,128,1,float16,float16,0,0.022969600558280946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,2,128,1,float16,fp8,0,0.02082560062408447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,40,128,1,fp8,fp8,0,0.03519839942455292
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,4,128,1,float16,float16,0,0.022703999280929567
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,4,128,1,float16,fp8,0,0.020849600434303284
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,4,128,1,fp8,fp8,0,0.02080959975719452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,8,128,1,float16,float16,0,0.022784000635147093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,8,128,1,float16,fp8,0,0.020745599269866945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,8,128,1,fp8,fp8,0,0.02194399982690811
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,40,128,1,float16,float16,0,0.01671680063009262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,40,128,1,float16,fp8,0,0.016731199622154237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,40,128,1,fp8,fp8,0,0.0166703999042511
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,1,128,1,float16,float16,0,0.01653439998626709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,1,128,1,float16,fp8,0,0.014846399426460266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,1,128,1,fp8,fp8,0,0.016568000614643096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,2,128,1,float16,float16,0,0.01661760061979294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,2,128,1,float16,fp8,0,0.01666080057621002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,2,128,1,fp8,fp8,0,0.016630400717258454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,4,128,1,float16,float16,0,0.016497600078582763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,4,128,1,float16,fp8,0,0.016596800088882445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,4,128,1,fp8,fp8,0,0.016734400391578676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,8,128,1,float16,float16,0,0.016663999855518342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,8,128,1,float16,fp8,0,0.01666080057621002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,8,128,1,fp8,fp8,0,0.01663520038127899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,40,128,1,float16,float16,0,0.014539200067520141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,40,128,1,float16,fp8,0,0.012647999823093415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,40,128,1,fp8,fp8,0,0.012761600315570831
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,1,128,1,float16,float16,0,0.012608000636100769
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,1,128,1,float16,fp8,0,0.012775999307632447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,1,128,1,fp8,fp8,0,0.012595200538635254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,2,128,1,float16,float16,0,0.01356479972600937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,2,128,1,float16,fp8,0,0.01252640038728714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,2,128,1,fp8,fp8,0,0.012676799297332763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,4,128,1,float16,float16,0,0.012585599720478059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,4,128,1,float16,fp8,0,0.01268640011548996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,4,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,8,128,1,float16,float16,0,0.013841600716114044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,8,128,1,float16,fp8,0,0.012638400495052337
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,8,128,1,fp8,fp8,0,0.012625600397586822
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,40,128,1,float16,float16,0,0.010606399923563003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,40,128,1,float16,fp8,0,0.010686399787664414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,40,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,1,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,1,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,1,128,1,fp8,fp8,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,2,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,2,128,1,fp8,fp8,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,4,128,1,float16,float16,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,4,128,1,float16,fp8,0,0.010532800108194351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,4,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,8,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,8,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,8,128,1,fp8,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,40,128,1,float16,float16,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,40,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,40,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,1,128,1,float16,float16,0,0.010583999752998351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,1,128,1,float16,fp8,0,0.010619200021028518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,1,128,1,fp8,fp8,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,2,128,1,float16,float16,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,2,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,2,128,1,fp8,fp8,0,0.020924800634384157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,2,128,1,fp8,fp8,0,0.010527999699115753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,4,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,4,128,1,float16,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,4,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,8,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,8,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,2,128,1,float16,fp8,0,0.010664000362157821
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,1,128,1,float16,float16,0,0.20396959781646729
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,1,128,1,float16,fp8,0,0.1906175971031189
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,1,128,1,fp8,fp8,0,0.19113600254058838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,2,128,1,float16,float16,0,0.2048975944519043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,2,128,1,float16,fp8,0,0.19105759859085084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,2,128,1,fp8,fp8,0,0.1912511944770813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,4,128,1,float16,float16,0,0.20485599040985109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,1,128,1,float16,fp8,0,0.033048000931739804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,4,128,1,float16,fp8,0,0.1909119963645935
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,4,128,1,fp8,fp8,0,0.18891199827194213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,8,128,1,float16,float16,0,0.20490720272064208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,8,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,8,128,1,fp8,fp8,0,0.19107520580291748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,0,0.1007856011390686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,40,128,1,fp8,fp8,0,0.10088959932327271
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,1,128,1,float16,float16,0,0.10713920593261719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,1,128,1,float16,fp8,0,0.10081759691238404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,1,128,1,fp8,fp8,0,0.10077279806137085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,2,128,1,float16,float16,0,0.10705759525299072
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,2,128,1,float16,fp8,0,0.10094879865646363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,2,128,1,fp8,fp8,0,0.10076160430908203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,4,128,1,float16,float16,0,0.10709760189056397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,4,128,1,float16,fp8,0,0.09922879934310913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,4,128,1,fp8,fp8,0,0.10064480304718018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,8,128,1,float16,fp8,0,0.1888576030731201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,8,128,1,float16,fp8,0,0.10081119537353515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,8,128,1,float16,float16,0,0.10731359720230102
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,8,128,1,fp8,fp8,0,0.10069119930267334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,0,0.10701440572738648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,0,0.059988802671432494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,40,128,1,fp8,fp8,0,0.055580800771713255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,1,128,1,float16,float16,0,0.05777119994163513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,1,128,1,float16,fp8,0,0.05545120239257813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,2,128,1,float16,float16,0,0.057792001962661745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,2,128,1,float16,fp8,0,0.05501919984817505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,2,128,1,fp8,fp8,0,0.05382080078125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,4,128,1,float16,float16,0,0.059755200147628786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,4,128,1,float16,fp8,0,0.05396159887313843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,4,128,1,fp8,fp8,0,0.054872000217437746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,8,128,1,float16,fp8,0,0.05548959970474243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,8,128,1,fp8,fp8,0,0.05569919943809509
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,0,0.03524320125579834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,0,0.033000001311302186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,40,128,1,fp8,fp8,0,0.033030399680137636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,1,128,1,float16,float16,0,0.03514719903469086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,1,128,1,float16,fp8,0,0.03295679986476898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,1,128,1,fp8,fp8,0,0.033032000064849854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,2,128,1,float16,float16,0,0.03531680107116699
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,2,128,1,float16,fp8,0,0.032971200346946714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,2,128,1,fp8,fp8,0,0.032969599962234496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,4,128,1,float16,float16,0,0.0350735992193222
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,4,128,1,float16,fp8,0,0.032969599962234496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,1,128,1,fp8,fp8,0,0.0546176016330719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,4,128,1,fp8,fp8,0,0.03310559988021851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,8,128,1,float16,float16,0,0.03507519960403442
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,8,128,1,float16,fp8,0,0.03294239938259125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,8,128,1,fp8,fp8,0,0.03246400058269501
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,0,0.022907200455665588
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,0,0.020796799659729005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,1,128,1,float16,float16,0,0.022681599855422972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,1,128,1,float16,fp8,0,0.020883199572563172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,1,128,1,fp8,fp8,0,0.020854400098323823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,2,128,1,float16,float16,0,0.022841599583625794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,2,128,1,float16,fp8,0,0.02099040001630783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,2,128,1,fp8,fp8,0,0.020732800662517547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,4,128,1,float16,float16,0,0.022710399329662324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,4,128,1,float16,fp8,0,0.02093279957771301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,4,128,1,fp8,fp8,0,0.020827199518680572
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,8,128,1,float16,float16,0,0.02269279956817627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,8,128,1,float16,fp8,0,0.02101600021123886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,8,128,1,fp8,fp8,0,0.02083519995212555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,0,0.016734400391578676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,0,0.016599999368190767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,40,128,1,fp8,fp8,0,0.016161599755287172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,1,128,1,float16,float16,0,0.01652639955282211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,1,128,1,float16,fp8,0,0.014891199767589569
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,1,128,1,fp8,fp8,0,0.014975999295711518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,0,0.05419039726257324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,2,128,1,float16,fp8,0,0.014660799503326416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,2,128,1,fp8,fp8,0,0.01459999978542328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,4,128,1,float16,float16,0,0.015992000699043274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,4,128,1,float16,fp8,0,0.014907200634479523
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,8,128,1,float16,float16,0,0.059299200773239136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,4,128,1,fp8,fp8,0,0.0151296004652977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,8,128,1,float16,fp8,0,0.015750400722026825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,8,128,1,fp8,fp8,0,0.015547199547290802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,0,0.014595200121402741
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,0,0.012558400630950928
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,40,128,1,fp8,fp8,0,0.01265760064125061
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,1,128,1,float16,float16,0,0.012580800056457519
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,1,128,1,float16,fp8,0,0.01252640038728714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,1,128,1,fp8,fp8,0,0.012651200592517852
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,2,128,1,float16,float16,0,0.012580800056457519
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,2,128,1,float16,fp8,0,0.012566399574279786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,2,128,1,fp8,fp8,0,0.012583999335765839
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,4,128,1,float16,float16,0,0.012638400495052337
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,4,128,1,float16,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,2,128,1,float16,float16,0,0.01664319932460785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,4,128,1,fp8,fp8,0,0.012531200051307678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,8,128,1,float16,float16,0,0.012625600397586822
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,8,128,1,float16,fp8,0,0.012569600343704223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,8,128,1,fp8,fp8,0,0.012603199481964112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,0,0.011284799873828888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,40,128,1,fp8,fp8,0,0.0208624005317688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,8,128,1,float16,float16,0,0.016628800332546233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,40,128,1,fp8,fp8,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,1,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,1,128,1,fp8,fp8,0,0.010604800283908844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,1,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,2,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,2,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,2,128,1,float16,fp8,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,4,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,4,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,8,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,8,128,1,float16,fp8,0,0.010571199655532836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,8,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,40,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,1,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,1,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,1,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,2,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,2,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,2,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,4,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,4,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,4,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,8,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,8,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,8,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,4,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,0,0.012363199889659882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,1,128,1,fp8,fp8,0,11.010052490234376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,1,128,1,float16,fp8,0,11.961284637451172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,1,128,1,float16,float16,0,18.13036346435547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,2,128,1,float16,float16,0,18.708004760742188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,2,128,1,float16,fp8,0,11.262078094482423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,2,128,1,fp8,fp8,0,12.45272445678711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,4,128,1,fp8,fp8,0,12.303699493408203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,4,128,1,float16,fp8,0,13.694985961914062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,4,128,1,float16,float16,0,20.248362731933593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,8,128,1,float16,float16,0,21.3197021484375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,8,128,1,float16,fp8,0,11.961991882324218
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,8,128,1,fp8,fp8,0,12.795673370361328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,32,128,1,float16,fp8,0,5.980279922485352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,32,128,1,fp8,fp8,0,5.856095886230468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,1,128,1,float16,float16,0,10.206531524658203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,1,128,1,float16,fp8,0,5.836732864379883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,1,128,1,fp8,fp8,0,5.5781505584716795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,2,128,1,float16,fp8,0,5.777385711669922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,2,128,1,fp8,fp8,0,5.655331039428711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,2,128,1,float16,float16,0,10.89169921875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,4,128,1,float16,fp8,0,5.772769546508789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,4,128,1,float16,float16,0,10.479283142089844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,4,128,1,fp8,fp8,0,6.014366531372071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,8,128,1,fp8,fp8,0,6.018385696411133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,8,128,1,float16,fp8,0,6.346435165405273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,8,128,1,float16,float16,0,10.306983947753906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,32,128,1,float16,fp8,0,2.9204736709594727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,32,128,1,fp8,fp8,0,3.271257781982422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,1,128,1,float16,float16,0,5.074225616455078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,1,128,1,float16,fp8,0,2.780473518371582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,1,128,1,fp8,fp8,0,2.7808111190795897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,2,128,1,float16,float16,0,4.149118423461914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,2,128,1,float16,fp8,0,2.7707887649536134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,2,128,1,fp8,fp8,0,2.95831356048584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,4,128,1,float16,float16,0,4.817233657836914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,4,128,1,float16,fp8,0,2.755308723449707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,4,128,1,fp8,fp8,0,2.9811391830444336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,8,128,1,float16,float16,0,3.907459259033203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,8,128,1,float16,fp8,0,2.8892143249511717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,8,128,1,fp8,fp8,0,2.782542419433594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,32,128,1,float16,fp8,0,1.5035776138305663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,32,128,1,fp8,fp8,0,1.4802224159240722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,1,128,1,float16,float16,0,1.7035375595092774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,1,128,1,float16,fp8,0,1.5543055534362793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,1,128,1,fp8,fp8,0,1.4184399604797364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,2,128,1,float16,float16,0,1.779876708984375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,2,128,1,float16,fp8,0,1.568830394744873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,2,128,1,fp8,fp8,0,1.4007856369018554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,4,128,1,float16,float16,0,1.6152799606323243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,4,128,1,float16,fp8,0,1.911372756958008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,4,128,1,fp8,fp8,0,1.4228079795837403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,8,128,1,float16,float16,0,1.829747200012207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,8,128,1,float16,fp8,0,1.4924863815307616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,8,128,1,fp8,fp8,0,1.4171296119689942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,1,128,1,float16,float16,0,10.707206726074219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,1,128,1,float16,fp8,0,6.749492645263672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,1,128,1,fp8,fp8,0,6.814769744873047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,2,128,1,float16,float16,0,10.902649688720704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,2,128,1,float16,fp8,0,6.689537811279297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,2,128,1,fp8,fp8,0,6.828227233886719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,4,128,1,float16,float16,0,11.316819000244141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,4,128,1,float16,fp8,0,6.866889953613281
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,4,128,1,fp8,fp8,0,6.6366111755371096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,32,128,1,float16,float16,0,1.641059112548828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,8,128,1,float16,float16,0,11.297966766357423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,32,128,1,float16,float16,0,3.2983776092529298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,32,128,1,float16,float16,0,8.964201354980469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,8,128,1,float16,fp8,0,6.857718658447266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,32,128,1,float16,float16,0,5.079033660888672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,32,128,1,float16,fp8,0,3.3427200317382812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,8,128,1,fp8,fp8,0,7.056715393066407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,32,128,1,fp8,fp8,0,3.4893550872802734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,1,128,1,float16,fp8,0,3.2613857269287108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,1,128,1,fp8,fp8,0,3.072769546508789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,1,128,1,float16,float16,0,5.018455886840821
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,2,128,1,float16,fp8,0,3.1516223907470704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,2,128,1,fp8,fp8,0,3.283577728271484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,2,128,1,float16,float16,0,5.43016471862793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,4,128,1,float16,float16,0,5.5805408477783205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,4,128,1,float16,fp8,0,3.3993873596191406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,4,128,1,fp8,fp8,0,3.1613088607788087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,32,128,1,float16,float16,0,2.4368911743164063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,8,128,1,fp8,fp8,0,3.3839759826660156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,8,128,1,float16,fp8,0,3.5937599182128905
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,32,128,1,fp8,fp8,0,1.6792448043823243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,1,128,1,float16,float16,0,2.1274959564208986
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,32,128,1,float16,fp8,0,2.5423376083374025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,1,128,1,float16,fp8,0,1.6854480743408202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,8,128,1,float16,float16,0,5.580987167358399
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,1,128,1,fp8,fp8,0,1.8871488571166992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,2,128,1,float16,float16,0,1.8492111206054687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,2,128,1,fp8,fp8,0,1.569660758972168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,2,128,1,float16,fp8,0,1.7146528244018555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,4,128,1,float16,float16,0,1.968324851989746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,4,128,1,float16,fp8,0,2.1033807754516602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,4,128,1,fp8,fp8,0,1.6112783432006836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,8,128,1,float16,float16,0,1.9991167068481446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,8,128,1,float16,fp8,0,1.7015792846679687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,32,128,1,float16,float16,0,1.0260623931884765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,8,128,1,fp8,fp8,0,1.7814191818237304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,32,128,1,float16,fp8,0,1.2766592025756835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,32,128,1,fp8,fp8,0,0.8659119606018066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,1,128,1,float16,float16,0,0.9675392150878906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,1,128,1,float16,fp8,0,0.9350399971008301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,1,128,1,fp8,fp8,0,0.9784000396728516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,2,128,1,float16,float16,0,0.9414480209350586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,2,128,1,float16,fp8,0,0.8933775901794434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,2,128,1,fp8,fp8,0,0.8485343933105469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,4,128,1,float16,float16,0,0.959175968170166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,4,128,1,float16,fp8,0,0.8578304290771485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,4,128,1,fp8,fp8,0,0.8548959732055664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,8,128,1,float16,float16,0,0.9862192153930665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,8,128,1,float16,fp8,0,0.9900879859924316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,8,128,1,fp8,fp8,0,0.838163185119629
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,1,128,1,float16,fp8,0,4.815476989746093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,1,128,1,fp8,fp8,0,5.154201507568359
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,1,128,1,float16,float16,0,7.146871948242188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,2,128,1,float16,float16,0,7.242121887207031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,2,128,1,float16,fp8,0,4.995145416259765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,2,128,1,fp8,fp8,0,5.132164764404297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,4,128,1,float16,fp8,0,4.812529754638672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,4,128,1,float16,float16,0,7.4092658996582035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,4,128,1,fp8,fp8,0,4.979931259155274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,8,128,1,float16,fp8,0,4.8490447998046875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,32,128,1,float16,float16,0,2.9324319839477537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,8,128,1,float16,float16,0,8.708545684814453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,8,128,1,fp8,fp8,0,5.052507019042968
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,32,128,1,float16,fp8,0,2.4892671585083006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,1,128,1,float16,fp8,0,2.292633628845215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,32,128,1,fp8,fp8,0,2.906452751159668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,1,128,1,fp8,fp8,0,2.226540756225586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,1,128,1,float16,float16,0,4.134695816040039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,2,128,1,fp8,fp8,0,2.3646896362304686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,2,128,1,float16,fp8,0,2.512718391418457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,4,128,1,float16,float16,0,4.004601669311524
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,4,128,1,float16,fp8,0,2.276203155517578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,4,128,1,fp8,fp8,0,2.3431983947753907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,2,128,1,float16,float16,0,3.239120101928711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,8,128,1,float16,fp8,0,2.270841598510742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,8,128,1,fp8,fp8,0,2.278788757324219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,32,128,1,float16,float16,0,1.3727359771728516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,32,128,1,float16,fp8,0,1.2238688468933105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,8,128,1,float16,float16,0,4.260663986206055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,32,128,1,fp8,fp8,0,1.4402607917785644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,1,128,1,float16,float16,0,1.405907154083252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,1,128,1,float16,fp8,0,1.1280816078186036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,1,128,1,fp8,fp8,0,1.4202704429626465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,2,128,1,float16,float16,0,1.3115792274475098
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,2,128,1,float16,fp8,0,1.2466976165771484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,2,128,1,fp8,fp8,0,1.1185647964477539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,4,128,1,float16,fp8,0,1.2429072380065918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,4,128,1,float16,float16,0,1.420248031616211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,8,128,1,float16,float16,0,1.2854559898376465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,8,128,1,float16,fp8,0,1.2892191886901856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,8,128,1,fp8,fp8,0,1.4309760093688966
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,32,128,1,float16,fp8,0,0.647211217880249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,32,128,1,fp8,fp8,0,0.7519120216369629
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,1,128,1,float16,float16,0,0.7069231986999511
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,4,128,1,fp8,fp8,0,1.1460111618041993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,1,128,1,fp8,fp8,0,0.631276798248291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,2,128,1,float16,float16,0,0.7018943786621094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,2,128,1,float16,fp8,0,0.6220448017120361
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,32,128,1,float16,float16,0,0.8820544242858886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,2,128,1,fp8,fp8,0,0.6908207893371582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,4,128,1,float16,float16,0,0.7102880001068115
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,4,128,1,float16,fp8,0,0.6053071975708008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,4,128,1,fp8,fp8,0,0.6177472114562989
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,8,128,1,float16,float16,0,0.692742395401001
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,1,128,1,float16,fp8,0,0.6110720157623291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,8,128,1,float16,fp8,0,0.616099214553833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,8,128,1,fp8,fp8,0,0.6055632114410401
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,1,128,1,fp8,fp8,0,5.989409637451172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,1,128,1,float16,fp8,0,6.271257781982422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,1,128,1,float16,float16,0,9.71890869140625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,2,128,1,float16,fp8,0,5.983438491821289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,2,128,1,fp8,fp8,0,6.588358306884766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,2,128,1,float16,float16,0,9.454662322998047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,4,128,1,float16,fp8,0,6.020651245117188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,4,128,1,float16,float16,0,10.567742156982423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,4,128,1,fp8,fp8,0,6.809590148925781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,8,128,1,float16,float16,0,9.904347229003907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,8,128,1,float16,fp8,0,6.78116455078125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,32,128,1,float16,float16,0,5.678446578979492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,8,128,1,fp8,fp8,0,7.212363433837891
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,32,128,1,float16,fp8,0,3.1472543716430663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,32,128,1,fp8,fp8,0,3.2773502349853514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,1,128,1,float16,fp8,0,3.3180622100830077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,1,128,1,fp8,fp8,0,3.2028224945068358
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,1,128,1,float16,float16,0,5.517910385131836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,2,128,1,float16,float16,0,4.4037727355957035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,2,128,1,float16,fp8,0,3.279169464111328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,2,128,1,fp8,fp8,0,3.2493648529052734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,4,128,1,float16,fp8,0,2.9752223968505858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,4,128,1,fp8,fp8,0,3.0546640396118163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,4,128,1,float16,float16,0,5.530137634277343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,8,128,1,float16,float16,0,4.73807373046875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,8,128,1,fp8,fp8,0,3.192531204223633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,32,128,1,float16,float16,0,2.6404767990112306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,32,128,1,float16,fp8,0,1.8786752700805665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,32,128,1,fp8,fp8,0,1.5885135650634765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,1,128,1,float16,fp8,0,1.4566816329956054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,1,128,1,float16,float16,0,2.3499887466430662
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,1,128,1,fp8,fp8,0,1.4303423881530761
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,8,128,1,float16,fp8,0,3.2223838806152343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,2,128,1,float16,float16,0,1.645167922973633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,2,128,1,fp8,fp8,0,1.4979632377624512
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,2,128,1,float16,fp8,0,1.8020431518554687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,4,128,1,float16,float16,0,1.5972784042358399
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,4,128,1,float16,fp8,0,1.7107152938842773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,4,128,1,fp8,fp8,0,1.5053824424743651
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,8,128,1,float16,float16,0,1.6928367614746094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,8,128,1,float16,fp8,0,1.6103248596191406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,8,128,1,fp8,fp8,0,1.499396800994873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,32,128,1,float16,float16,0,0.9361295700073242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,32,128,1,float16,fp8,0,1.146446418762207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,32,128,1,fp8,fp8,0,0.8273615837097168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,1,128,1,float16,float16,0,0.8800751686096191
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,1,128,1,float16,fp8,0,0.9134960174560547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,1,128,1,fp8,fp8,0,0.7728079795837403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,2,128,1,float16,fp8,0,0.7743023872375489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,2,128,1,fp8,fp8,0,0.7603312015533448
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,4,128,1,float16,float16,0,0.8914319992065429
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,4,128,1,float16,fp8,0,0.8181695938110352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,4,128,1,fp8,fp8,0,0.7689455986022949
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,8,128,1,float16,fp8,0,0.7672592163085937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,8,128,1,fp8,fp8,0,0.7699071884155273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,8,128,1,float16,float16,0,0.8972720146179199
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,32,128,1,float16,float16,0,0.5107024192810059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,32,128,1,float16,fp8,0,0.4445968151092529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,32,128,1,fp8,fp8,0,0.45619997978210447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,1,128,1,float16,float16,0,0.48254079818725587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,1,128,1,fp8,fp8,0,0.4284480094909668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,2,128,1,float16,float16,0,0.8487104415893555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,2,128,1,float16,float16,0,0.4856400012969971
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,2,128,1,float16,fp8,0,0.4249392032623291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,2,128,1,fp8,fp8,0,0.4140624046325684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,4,128,1,float16,float16,0,0.49045758247375487
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,4,128,1,float16,fp8,0,0.42423357963562014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,4,128,1,fp8,fp8,0,0.4152336120605469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,8,128,1,float16,fp8,0,0.4252480030059814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,8,128,1,float16,float16,0,0.4901552200317383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,8,128,1,fp8,fp8,0,0.4158736228942871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,1,128,1,float16,fp8,0,0.4178927898406982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,1,128,1,float16,fp8,0,3.356126403808594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,1,128,1,fp8,fp8,0,3.329884719848633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,1,128,1,float16,float16,0,5.638326263427734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,2,128,1,float16,float16,0,4.626497650146485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,2,128,1,float16,fp8,0,3.387607955932617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,2,128,1,fp8,fp8,0,3.4315345764160154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,4,128,1,float16,fp8,0,3.723723220825195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,4,128,1,fp8,fp8,0,3.515433502197266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,4,128,1,float16,float16,0,5.646108627319336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,8,128,1,float16,float16,0,5.434819030761719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,8,128,1,float16,fp8,0,3.617006301879883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,8,128,1,fp8,fp8,0,3.427451324462891
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,32,128,1,float16,float16,0,3.474478530883789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,32,128,1,float16,fp8,0,2.139334487915039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,32,128,1,fp8,fp8,0,1.8373023986816406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,1,128,1,float16,float16,0,2.125992012023926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,1,128,1,fp8,fp8,0,1.6474319458007813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,1,128,1,float16,fp8,0,2.3059024810791016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,2,128,1,fp8,fp8,0,1.653780746459961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,2,128,1,float16,float16,0,2.5850879669189455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,2,128,1,float16,fp8,0,2.022043228149414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,4,128,1,float16,float16,0,1.9725967407226563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,4,128,1,float16,fp8,0,1.749737548828125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,4,128,1,fp8,fp8,0,1.8998224258422851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,8,128,1,float16,fp8,0,1.7368799209594727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,8,128,1,fp8,fp8,0,1.7833728790283203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,32,128,1,float16,float16,0,1.1577152252197265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,32,128,1,float16,fp8,0,0.9829327583312988
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,1,128,1,float16,float16,0,0.9705488204956054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,1,128,1,float16,fp8,0,0.8687456130981446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,8,128,1,float16,float16,0,2.894628715515137
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,1,128,1,fp8,fp8,0,1.0797120094299317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,2,128,1,float16,fp8,0,0.8718303680419922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,2,128,1,fp8,fp8,0,0.9810895919799805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,4,128,1,float16,float16,0,0.9869680404663086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,4,128,1,float16,fp8,0,0.8676560401916504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,32,128,1,fp8,fp8,0,1.3717408180236816
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,4,128,1,fp8,fp8,0,0.9387552261352539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,8,128,1,float16,float16,0,1.0861167907714844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,8,128,1,float16,fp8,0,0.8537455558776855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,8,128,1,fp8,fp8,0,0.8778400421142578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,32,128,1,float16,fp8,0,0.49893918037414553
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,32,128,1,float16,float16,0,0.7462384223937988
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,2,128,1,float16,float16,0,0.9743408203125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,1,128,1,float16,float16,0,0.5155839920043945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,32,128,1,fp8,fp8,0,0.5100431919097901
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,1,128,1,float16,fp8,0,0.636297607421875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,1,128,1,fp8,fp8,0,0.4690256118774414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,2,128,1,float16,float16,0,0.5198592185974121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,2,128,1,float16,fp8,0,0.4706592082977295
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,2,128,1,fp8,fp8,0,0.6352047920227051
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,4,128,1,float16,float16,0,0.5303215980529785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,4,128,1,float16,fp8,0,0.4854191780090332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,4,128,1,fp8,fp8,0,0.4689663887023926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,8,128,1,float16,fp8,0,0.46813440322875977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,32,128,1,float16,float16,0,0.32279040813446047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,8,128,1,fp8,fp8,0,0.4742447853088379
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,32,128,1,float16,fp8,0,0.2854048013687134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,1,128,1,float16,float16,0,0.2862799882888794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,1,128,1,float16,fp8,0,0.2630192041397095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,1,128,1,fp8,fp8,0,0.25782079696655275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,2,128,1,float16,float16,0,0.29391839504241946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,2,128,1,float16,fp8,0,0.25785601139068604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,2,128,1,fp8,fp8,0,0.26260159015655515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,4,128,1,float16,float16,0,0.2901087999343872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,4,128,1,float16,fp8,0,0.26287360191345216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,4,128,1,fp8,fp8,0,0.25817599296569826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,8,128,1,float16,float16,0,0.3001456022262573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,8,128,1,float16,fp8,0,0.2575263977050781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,8,128,1,fp8,fp8,0,0.2629568099975586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,8,128,1,float16,float16,0,0.531113576889038
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,32,128,1,fp8,fp8,0,0.2865823984146118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,1,128,1,float16,fp8,0,3.1242191314697267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,1,128,1,float16,float16,0,4.733201599121093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,1,128,1,fp8,fp8,0,3.307622528076172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,2,128,1,float16,float16,0,4.541643142700195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,2,128,1,float16,fp8,0,3.236627197265625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,2,128,1,fp8,fp8,0,3.3170753479003907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,4,128,1,float16,float16,0,5.271444702148438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,4,128,1,fp8,fp8,0,3.2098464965820312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,4,128,1,float16,fp8,0,3.4862224578857424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,8,128,1,float16,float16,0,4.924208068847657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,32,128,1,float16,float16,0,2.2002384185791017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,8,128,1,float16,fp8,0,3.5301631927490233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,8,128,1,fp8,fp8,0,3.275222396850586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,32,128,1,fp8,fp8,0,2.058646392822266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,1,128,1,float16,float16,0,1.890225601196289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,1,128,1,float16,fp8,0,1.7091392517089843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,1,128,1,fp8,fp8,0,1.6906976699829102
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,2,128,1,float16,fp8,0,1.597980785369873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,2,128,1,float16,float16,0,2.7274848937988283
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,32,128,1,float16,fp8,0,2.0118160247802734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,4,128,1,float16,float16,0,1.977987289428711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,4,128,1,float16,fp8,0,1.7464208602905273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,4,128,1,fp8,fp8,0,1.5589344024658203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,8,128,1,float16,float16,0,2.2956464767456053
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,8,128,1,float16,fp8,0,1.648873519897461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,8,128,1,fp8,fp8,0,1.5625807762145996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,2,128,1,fp8,fp8,0,1.5879072189331054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,32,128,1,float16,fp8,0,1.0174176216125488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,32,128,1,fp8,fp8,0,0.959011173248291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,32,128,1,float16,float16,0,1.5435791969299317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,1,128,1,float16,float16,0,0.9167632102966309
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,1,128,1,fp8,fp8,0,0.8109824180603027
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,1,128,1,float16,fp8,0,0.9333935737609863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,2,128,1,float16,fp8,0,0.8248592376708984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,2,128,1,float16,float16,0,1.2549327850341796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,2,128,1,fp8,fp8,0,0.8099648475646972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,4,128,1,float16,float16,0,1.0715968132019043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,4,128,1,float16,fp8,0,0.8260975837707519
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,4,128,1,fp8,fp8,0,1.0093119621276856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,8,128,1,float16,float16,0,0.906828784942627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,8,128,1,fp8,fp8,0,0.8149344444274902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,32,128,1,float16,float16,0,0.6110752105712891
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,8,128,1,float16,fp8,0,1.2611056327819825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,32,128,1,float16,fp8,0,0.6720191955566406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,32,128,1,fp8,fp8,0,0.4903456211090088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,1,128,1,float16,float16,0,0.5139167785644532
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,1,128,1,float16,fp8,0,0.513046407699585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,1,128,1,fp8,fp8,0,0.43772640228271487
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,2,128,1,float16,float16,0,0.48424320220947265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,2,128,1,fp8,fp8,0,0.44187679290771487
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,4,128,1,float16,float16,0,0.4760591983795166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,4,128,1,float16,fp8,0,0.43473281860351565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,4,128,1,fp8,fp8,0,0.42922401428222656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,8,128,1,float16,float16,0,0.49135360717773435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,8,128,1,float16,fp8,0,0.43317279815673826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,32,128,1,float16,float16,0,0.2944112062454224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,8,128,1,fp8,fp8,0,0.4324207782745361
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,32,128,1,float16,fp8,0,0.2646944046020508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,32,128,1,fp8,fp8,0,0.30002560615539553
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,1,128,1,float16,float16,0,0.2613168001174927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,1,128,1,float16,fp8,0,0.2378335952758789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,1,128,1,fp8,fp8,0,0.23687360286712647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,2,128,1,float16,float16,0,0.2627631902694702
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,2,128,1,float16,fp8,0,0.2369215965270996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,2,128,1,fp8,fp8,0,0.23791038990020752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,4,128,1,float16,float16,0,0.2618256092071533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,4,128,1,float16,fp8,0,0.23827519416809081
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,2,128,1,float16,fp8,0,0.5112127780914306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,4,128,1,fp8,fp8,0,0.2369839906692505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,8,128,1,float16,float16,0,0.2695904016494751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,8,128,1,float16,fp8,0,0.23965120315551758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,8,128,1,fp8,fp8,0,0.23749120235443116
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,32,128,1,float16,float16,0,0.17012319564819336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,32,128,1,float16,fp8,0,0.15443520545959472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,32,128,1,fp8,fp8,0,0.15415680408477783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,1,128,1,float16,float16,0,0.1497056007385254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,1,128,1,float16,fp8,0,0.1385472059249878
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,1,128,1,fp8,fp8,0,0.13753440380096435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,2,128,1,float16,float16,0,0.15016000270843505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,2,128,1,float16,fp8,0,0.13818559646606446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,2,128,1,fp8,fp8,0,0.13775360584259033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,4,128,1,float16,float16,0,0.1512287974357605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,4,128,1,float16,fp8,0,0.1388815999031067
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,4,128,1,fp8,fp8,0,0.13736000061035156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,8,128,1,float16,float16,0,0.15516799688339233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,8,128,1,float16,fp8,0,0.13901920318603517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,8,128,1,fp8,fp8,0,0.1366479992866516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,1,128,1,float16,fp8,0,1.887139129638672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,1,128,1,fp8,fp8,0,1.868071937561035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,1,128,1,float16,float16,0,2.3358192443847656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,2,128,1,float16,fp8,0,1.8890880584716796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,2,128,1,fp8,fp8,0,1.9007392883300782
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,2,128,1,float16,float16,0,2.0616207122802734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,4,128,1,float16,float16,0,3.1981504440307615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,4,128,1,float16,fp8,0,1.902164840698242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,8,128,1,float16,float16,0,2.398828887939453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,8,128,1,float16,fp8,0,2.1218767166137695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,8,128,1,fp8,fp8,0,1.9008527755737306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,32,128,1,float16,float16,0,1.6220016479492188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,32,128,1,float16,fp8,0,1.116983985900879
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,32,128,1,fp8,fp8,0,1.1397904396057128
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,4,128,1,fp8,fp8,0,1.8650848388671875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,1,128,1,float16,fp8,0,0.9641776084899902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,1,128,1,float16,float16,0,1.5552240371704102
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,2,128,1,float16,float16,0,1.0232527732849122
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,2,128,1,float16,fp8,0,0.9669440269470215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,4,128,1,float16,float16,0,1.037161636352539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,2,128,1,fp8,fp8,0,1.4042559623718263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,4,128,1,float16,fp8,0,0.9570560455322266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,4,128,1,fp8,fp8,0,1.1543999671936036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,8,128,1,float16,float16,0,1.0928208351135253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,8,128,1,float16,fp8,0,1.0233967781066895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,1,128,1,fp8,fp8,0,0.9721743583679199
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,8,128,1,fp8,fp8,0,0.9812288284301758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,32,128,1,float16,float16,0,0.8386752128601074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,32,128,1,fp8,fp8,0,0.5844592094421387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,1,128,1,float16,float16,0,0.5399936199188232
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,1,128,1,float16,fp8,0,0.666815996170044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,1,128,1,fp8,fp8,0,0.5079808235168457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,2,128,1,float16,float16,0,0.569817590713501
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,2,128,1,float16,fp8,0,0.510038423538208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,2,128,1,fp8,fp8,0,0.5079040050506591
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,4,128,1,float16,float16,0,0.5821951866149903
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,4,128,1,float16,fp8,0,0.5086512088775634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,4,128,1,fp8,fp8,0,0.5073775768280029
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,8,128,1,float16,float16,0,0.5733119964599609
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,8,128,1,float16,fp8,0,0.505515193939209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,8,128,1,fp8,fp8,0,0.5057536125183105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,32,128,1,float16,float16,0,0.34572319984436034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,32,128,1,fp8,fp8,0,0.3125744104385376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,1,128,1,float16,float16,0,0.29528160095214845
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,32,128,1,float16,fp8,0,0.6018608093261719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,1,128,1,float16,fp8,0,0.27361600399017333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,1,128,1,fp8,fp8,0,0.2797631978988647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,2,128,1,float16,float16,0,0.295033597946167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,2,128,1,float16,fp8,0,0.2729088068008423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,2,128,1,fp8,fp8,0,0.27197120189666746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,4,128,1,float16,float16,0,0.29707839488983157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,4,128,1,float16,fp8,0,0.27192800045013427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,4,128,1,fp8,fp8,0,0.2701280117034912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,8,128,1,float16,float16,0,0.30394558906555175
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,8,128,1,float16,fp8,0,0.2710400104522705
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,8,128,1,fp8,fp8,0,0.26917119026184083
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,32,128,1,float16,float16,0,0.19088159799575805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,32,128,1,float16,fp8,0,0.17320159673690796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,32,128,1,fp8,fp8,0,0.17341599464416504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,1,128,1,float16,float16,0,0.1624127984046936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,1,128,1,float16,fp8,0,0.15000159740448
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,1,128,1,fp8,fp8,0,0.1509343981742859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,2,128,1,float16,float16,0,0.16288000345230103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,2,128,1,float16,fp8,0,0.15023519992828369
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,2,128,1,fp8,fp8,0,0.14964159727096557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,4,128,1,float16,float16,0,0.16804319620132446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,4,128,1,float16,fp8,0,0.14973759651184082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,4,128,1,fp8,fp8,0,0.15130879878997802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,8,128,1,float16,float16,0,0.17077759504318238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,8,128,1,float16,fp8,0,0.15073280334472655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,8,128,1,fp8,fp8,0,0.1504591941833496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,32,128,1,float16,float16,0,0.11283040046691895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,32,128,1,float16,fp8,0,0.10012160539627075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,32,128,1,float16,fp8,0,0.3140239953994751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,32,128,1,fp8,fp8,0,0.10075680017471314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,1,128,1,float16,float16,0,0.0961184024810791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,1,128,1,fp8,fp8,0,0.08983359932899475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,1,128,1,float16,fp8,0,0.09148960113525391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,2,128,1,float16,float16,0,0.09600480198860169
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,2,128,1,float16,fp8,0,0.09051679968833923
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,2,128,1,fp8,fp8,0,0.09065600037574768
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,4,128,1,float16,float16,0,0.09657440185546876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,4,128,1,float16,fp8,0,0.08982399702072144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,4,128,1,fp8,fp8,0,0.09032639861106873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,8,128,1,float16,float16,0,0.09857439994812012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,8,128,1,float16,fp8,0,0.08969119787216187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,8,128,1,fp8,fp8,0,0.09041759967803956
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,1,128,1,float16,fp8,0,1.8939504623413086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,1,128,1,float16,float16,0,2.2765680313110352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,2,128,1,float16,float16,0,2.2696207046508787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,2,128,1,float16,fp8,0,1.927774429321289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,1,128,1,fp8,fp8,0,1.8860864639282227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,2,128,1,fp8,fp8,0,1.8678415298461915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,4,128,1,float16,fp8,0,1.9085712432861328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,4,128,1,float16,float16,0,2.3911312103271483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,4,128,1,fp8,fp8,0,1.8785232543945312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,8,128,1,float16,float16,0,2.486627197265625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,8,128,1,float16,fp8,0,1.9516111373901368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,8,128,1,fp8,fp8,0,1.9500192642211913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,32,128,1,float16,float16,0,1.369051170349121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,32,128,1,fp8,fp8,0,1.221059226989746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,32,128,1,float16,fp8,0,1.6062128067016601
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,1,128,1,float16,float16,0,1.0284751892089843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,1,128,1,float16,fp8,0,1.216926383972168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,1,128,1,fp8,fp8,0,0.9758319854736328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,2,128,1,float16,float16,0,1.0193360328674317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,2,128,1,float16,fp8,0,1.358176040649414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,2,128,1,fp8,fp8,0,0.9641839981079101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,4,128,1,float16,float16,0,1.0237360000610352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,4,128,1,float16,fp8,0,1.1406288146972656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,4,128,1,fp8,fp8,0,0.9641375541687012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,8,128,1,float16,float16,0,1.0833663940429688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,8,128,1,float16,fp8,0,1.136300754547119
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,32,128,1,float16,float16,0,0.6745520114898682
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,8,128,1,fp8,fp8,0,0.9911791801452636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,1,128,1,float16,float16,0,0.5336495876312256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,1,128,1,float16,fp8,0,0.496995210647583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,1,128,1,fp8,fp8,0,0.5067808151245117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,2,128,1,float16,float16,0,0.5187327861785889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,2,128,1,float16,fp8,0,0.5012815952301025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,2,128,1,fp8,fp8,0,0.49396958351135256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,4,128,1,float16,float16,0,0.5401279926300049
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,4,128,1,float16,fp8,0,0.49233121871948243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,4,128,1,fp8,fp8,0,0.49977598190307615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,8,128,1,float16,float16,0,0.5449999809265137
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,8,128,1,float16,fp8,0,0.49678878784179686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,8,128,1,fp8,fp8,0,0.5498223781585694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,32,128,1,float16,fp8,0,0.8795887947082519
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,32,128,1,float16,float16,0,0.35124959945678713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,32,128,1,fp8,fp8,0,0.6542992115020752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,32,128,1,float16,fp8,0,0.34452319145202637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,32,128,1,fp8,fp8,0,0.3216592073440552
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,1,128,1,fp8,fp8,0,0.2624703884124756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,2,128,1,float16,float16,0,0.28337280750274657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,2,128,1,fp8,fp8,0,0.2672863960266113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,4,128,1,float16,float16,0,0.2878416061401367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,4,128,1,float16,fp8,0,0.2659600019454956
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,4,128,1,fp8,fp8,0,0.26562559604644775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,8,128,1,float16,float16,0,0.2989743947982788
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,8,128,1,float16,fp8,0,0.26502718925476076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,8,128,1,fp8,fp8,0,0.2647279977798462
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,1,128,1,float16,float16,0,0.2809936046600342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,32,128,1,float16,float16,0,0.19318560361862183
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,32,128,1,float16,fp8,0,0.1750607967376709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,2,128,1,float16,fp8,0,0.26244640350341797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,32,128,1,fp8,fp8,0,0.17719039916992188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,1,128,1,float16,fp8,0,0.14203840494155884
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,1,128,1,fp8,fp8,0,0.14583519697189332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,2,128,1,float16,float16,0,0.15338720083236695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,2,128,1,float16,fp8,0,0.14541759490966796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,2,128,1,fp8,fp8,0,0.14309760332107543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,4,128,1,float16,fp8,0,0.14327839612960816
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,4,128,1,float16,float16,0,0.1598688006401062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,4,128,1,fp8,fp8,0,0.14275039434432985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,8,128,1,float16,float16,0,0.16416159868240357
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,8,128,1,float16,fp8,0,0.14324159622192384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,8,128,1,fp8,fp8,0,0.14642720222473143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,32,128,1,float16,float16,0,0.1059391975402832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,32,128,1,float16,fp8,0,0.09864799976348877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,32,128,1,fp8,fp8,0,0.09698399901390076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,1,128,1,float16,float16,0,0.08887519836425781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,1,128,1,fp8,fp8,0,0.08313279747962951
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,1,128,1,float16,fp8,0,0.26508638858795164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,2,128,1,float16,float16,0,0.08739839792251587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,2,128,1,float16,fp8,0,0.08241119980812073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,1,128,1,float16,float16,0,0.15612800121307374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,2,128,1,fp8,fp8,0,0.08215839862823486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,4,128,1,float16,float16,0,0.08871200084686279
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,4,128,1,float16,fp8,0,0.08225759863853455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,4,128,1,fp8,fp8,0,0.08241919875144958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,8,128,1,float16,float16,0,0.09127519726753235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,8,128,1,float16,fp8,0,0.08254560232162475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,8,128,1,fp8,fp8,0,0.08227840065956116
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,32,128,1,float16,float16,0,0.06386079788208007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,32,128,1,float16,fp8,0,0.059617602825164796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,32,128,1,fp8,fp8,0,0.05983520150184631
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,1,128,1,float16,float16,0,0.055580800771713255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,1,128,1,float16,fp8,0,0.05347520112991333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,1,128,1,fp8,fp8,0,0.05337280035018921
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,2,128,1,float16,float16,0,0.05586400032043457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,2,128,1,float16,fp8,0,0.05321279764175415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,2,128,1,fp8,fp8,0,0.05353279709815979
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,4,128,1,float16,float16,0,0.05601279735565186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,4,128,1,float16,fp8,0,0.05349439978599548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,4,128,1,fp8,fp8,0,0.05346879959106445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,8,128,1,float16,float16,0,0.057608002424240114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,8,128,1,float16,fp8,0,0.05356159806251526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,8,128,1,fp8,fp8,0,0.053518402576446536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,1,128,1,float16,fp8,0,0.0820527970790863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,1,128,1,float16,float16,0,1.2282032012939452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,1,128,1,float16,fp8,0,1.197862434387207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,1,128,1,fp8,fp8,0,1.1856320381164551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,2,128,1,float16,fp8,0,1.1939488410949708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,2,128,1,fp8,fp8,0,1.1826448440551758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,2,128,1,float16,float16,0,1.6080495834350585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,4,128,1,float16,fp8,0,1.1800127983093263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,4,128,1,float16,float16,0,1.3359248161315918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,4,128,1,fp8,fp8,0,1.3110896110534669
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,8,128,1,float16,float16,0,1.3318047523498535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,8,128,1,float16,fp8,0,1.179316806793213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,32,128,1,float16,float16,0,0.8589920043945313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,8,128,1,fp8,fp8,0,1.3646944046020508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,32,128,1,float16,fp8,0,0.7771008014678955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,1,128,1,float16,fp8,0,0.6216815948486328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,1,128,1,fp8,fp8,0,0.6213871955871582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,2,128,1,float16,float16,0,0.6362304210662841
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,2,128,1,float16,fp8,0,0.6180352210998535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,2,128,1,fp8,fp8,0,0.6143983840942383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,4,128,1,float16,float16,0,0.6590384006500244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,4,128,1,float16,fp8,0,0.6159135818481445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,4,128,1,fp8,fp8,0,0.6207551956176758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,8,128,1,float16,float16,0,0.6798031806945801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,1,128,1,float16,float16,0,0.7080416202545166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,8,128,1,fp8,fp8,0,0.6118015766143798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,8,128,1,float16,fp8,0,0.6131648063659668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,32,128,1,float16,float16,0,0.43835201263427737
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,32,128,1,float16,fp8,0,0.4010047912597656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,1,128,1,float16,float16,0,0.33027360439300535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,1,128,1,float16,fp8,0,0.32223680019378664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,1,128,1,fp8,fp8,0,0.3200176000595093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,2,128,1,float16,float16,0,0.33181440830230713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,2,128,1,float16,fp8,0,0.3189296007156372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,2,128,1,fp8,fp8,0,0.32162559032440186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,4,128,1,float16,float16,0,0.3357232093811035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,4,128,1,float16,fp8,0,0.3202816009521484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,4,128,1,fp8,fp8,0,0.3183552026748657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,8,128,1,float16,float16,0,0.3549983978271484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,8,128,1,float16,fp8,0,0.3157952070236206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,32,128,1,float16,float16,0,0.23113760948181153
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,8,128,1,fp8,fp8,0,0.3187936067581177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,32,128,1,float16,fp8,0,0.21150879859924315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,32,128,1,fp8,fp8,0,0.21346399784088135
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,1,128,1,float16,float16,0,0.17752000093460082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,1,128,1,float16,fp8,0,0.17249759435653686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,1,128,1,fp8,fp8,0,0.17041280269622802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,2,128,1,float16,float16,0,0.18066400289535522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,32,128,1,fp8,fp8,0,0.4005727767944336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,2,128,1,float16,fp8,0,0.17033439874649048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,2,128,1,fp8,fp8,0,0.17257440090179443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,4,128,1,float16,float16,0,0.18511519432067872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,4,128,1,fp8,fp8,0,0.1719040036201477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,8,128,1,float16,fp8,0,0.1740383982658386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,8,128,1,fp8,fp8,0,0.17098720073699952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,32,128,1,float16,float16,0,0.12710720300674438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,32,128,1,float16,fp8,0,0.11696159839630127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,32,128,1,fp8,fp8,0,0.11723200082778931
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,1,128,1,float16,float16,0,0.09905440211296082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,1,128,1,float16,fp8,0,0.09411200284957885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,1,128,1,fp8,fp8,0,0.0942031979560852
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,32,128,1,fp8,fp8,0,0.7720223903656006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,2,128,1,float16,float16,0,0.09968799948692322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,2,128,1,float16,fp8,0,0.09366559982299805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,2,128,1,fp8,fp8,0,0.09437119960784912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,4,128,1,float16,float16,0,0.10115679502487182
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,4,128,1,float16,fp8,0,0.09426079988479615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,4,128,1,fp8,fp8,0,0.09365760087966919
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,8,128,1,float16,float16,0,0.10704799890518188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,8,128,1,float16,fp8,0,0.09372159838676453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,8,128,1,fp8,fp8,0,0.0943888008594513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,32,128,1,float16,float16,0,0.07417119741439819
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,32,128,1,float16,fp8,0,0.06783199906349183
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,32,128,1,fp8,fp8,0,0.06782559752464294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,1,128,1,float16,float16,0,0.05974239706993103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,4,128,1,float16,fp8,0,0.17017279863357543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,1,128,1,float16,fp8,0,0.05779520273208618
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,1,128,1,fp8,fp8,0,0.056681597232818605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,2,128,1,float16,float16,0,0.059708797931671144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,8,128,1,float16,float16,0,0.19394079446792603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,2,128,1,fp8,fp8,0,0.05766080021858215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,4,128,1,float16,float16,0,0.06215360164642334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,4,128,1,float16,fp8,0,0.0576304018497467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,8,128,1,float16,float16,0,0.06359040141105651
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,8,128,1,float16,fp8,0,0.05780159831047058
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,8,128,1,fp8,fp8,0,0.057822400331497194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,32,128,1,float16,float16,0,0.04378879964351654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,32,128,1,float16,fp8,0,0.0434112012386322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,32,128,1,fp8,fp8,0,0.04328800141811371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,1,128,1,float16,float16,0,0.04087840020656586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,1,128,1,float16,fp8,0,0.03916960060596466
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,1,128,1,fp8,fp8,0,0.03917759954929352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,2,128,1,float16,float16,0,0.039684799313545224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,2,128,1,float16,fp8,0,0.03841120004653931
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,2,128,1,fp8,fp8,0,0.03856959939002991
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,4,128,1,float16,float16,0,0.0403439998626709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,4,128,1,float16,fp8,0,0.03924959897994995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,4,128,1,fp8,fp8,0,0.03912639915943146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,2,128,1,float16,fp8,0,0.05627520084381103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,8,128,1,float16,float16,0,0.04128639996051788
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,8,128,1,float16,fp8,0,0.0372191995382309
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,8,128,1,fp8,fp8,0,0.03920960128307342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,4,128,1,fp8,fp8,0,0.05960000157356262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,1,128,1,float16,float16,0,1.2629152297973634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,1,128,1,float16,fp8,0,1.2792112350463867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,1,128,1,fp8,fp8,0,1.277947235107422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,2,128,1,float16,float16,0,1.293064022064209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,2,128,1,fp8,fp8,0,1.2753999710083008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,4,128,1,float16,float16,0,1.3272000312805177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,4,128,1,fp8,fp8,0,1.2729007720947265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,8,128,1,float16,float16,0,1.529856014251709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,8,128,1,float16,fp8,0,1.268665599822998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,2,128,1,float16,fp8,0,1.401740837097168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,8,128,1,fp8,fp8,0,1.6087152481079101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,32,128,1,float16,float16,0,0.9641632080078125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,32,128,1,float16,fp8,0,0.8586079597473144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,1,128,1,float16,float16,0,0.6776624202728272
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,4,128,1,float16,fp8,0,1.3269904136657715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,32,128,1,fp8,fp8,0,0.9017104148864746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,1,128,1,float16,fp8,0,0.6499663829803467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,1,128,1,fp8,fp8,0,0.6585552215576171
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,2,128,1,float16,float16,0,0.6427231788635254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,2,128,1,float16,fp8,0,0.6535007953643799
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,2,128,1,fp8,fp8,0,0.6473184108734131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,4,128,1,float16,float16,0,0.6768928050994873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,4,128,1,float16,fp8,0,0.6425536155700684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,4,128,1,fp8,fp8,0,0.651145601272583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,8,128,1,float16,float16,0,0.7055712223052979
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,32,128,1,float16,float16,0,0.4922783851623535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,8,128,1,fp8,fp8,0,0.640496015548706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,32,128,1,float16,fp8,0,0.4958816051483154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,1,128,1,float16,float16,0,0.33677599430084226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,32,128,1,fp8,fp8,0,0.4471744060516357
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,1,128,1,float16,fp8,0,0.381715202331543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,1,128,1,fp8,fp8,0,0.3361119985580444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,2,128,1,float16,float16,0,0.34311039447784425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,2,128,1,float16,fp8,0,0.3369488000869751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,2,128,1,fp8,fp8,0,0.3351792097091675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,4,128,1,float16,float16,0,0.3522063970565796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,4,128,1,float16,fp8,0,0.3351408004760742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,4,128,1,fp8,fp8,0,0.33408479690551757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,8,128,1,float16,fp8,0,0.3327023983001709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,8,128,1,float16,float16,0,0.37052481174468993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,8,128,1,fp8,fp8,0,0.33346080780029297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,32,128,1,float16,float16,0,0.2523535966873169
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,32,128,1,float16,fp8,0,0.23420159816741942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,1,128,1,float16,float16,0,0.17985279560089112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,32,128,1,fp8,fp8,0,0.23296000957489013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,1,128,1,float16,fp8,0,0.18015520572662352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,1,128,1,fp8,fp8,0,0.17837599515914918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,2,128,1,float16,float16,0,0.17984800338745116
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,2,128,1,float16,fp8,0,0.1801136016845703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,2,128,1,fp8,fp8,0,0.178436803817749
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,8,128,1,float16,fp8,0,0.7598048210144043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,4,128,1,float16,fp8,0,0.17976479530334472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,4,128,1,fp8,fp8,0,0.177183997631073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,8,128,1,float16,float16,0,0.19735840559005738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,8,128,1,fp8,fp8,0,0.17692320346832274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,32,128,1,float16,float16,0,0.13672159910202025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,32,128,1,float16,fp8,0,0.1249951958656311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,32,128,1,fp8,fp8,0,0.12683839797973634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,1,128,1,float16,float16,0,0.0987775981426239
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,1,128,1,float16,fp8,0,0.09667040109634399
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,1,128,1,fp8,fp8,0,0.09492480158805847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,2,128,1,float16,float16,0,0.10128799676895142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,2,128,1,float16,fp8,0,0.09488480091094971
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,2,128,1,fp8,fp8,0,0.09673119783401489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,4,128,1,float16,float16,0,0.1015056014060974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,4,128,1,float16,fp8,0,0.0984607994556427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,4,128,1,fp8,fp8,0,0.09609280228614807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,8,128,1,float16,float16,0,0.10816160440444947
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,8,128,1,float16,fp8,0,0.09646720290184022
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,8,128,1,fp8,fp8,0,0.09797599911689758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,32,128,1,float16,float16,0,0.07605119943618774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,32,128,1,float16,fp8,0,0.07097920179367065
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,32,128,1,fp8,fp8,0,0.0712719976902008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,1,128,1,float16,float16,0,0.05723680257797241
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,1,128,1,float16,fp8,0,0.055936002731323244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,1,128,1,fp8,fp8,0,0.05584160089492798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,2,128,1,float16,float16,0,0.05703200101852417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,4,128,1,float16,float16,0,0.18422399759292601
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,2,128,1,float16,fp8,0,0.05615519881248474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,2,128,1,fp8,fp8,0,0.05600000023841858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,4,128,1,float16,float16,0,0.05860480070114136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,4,128,1,float16,fp8,0,0.055873602628707886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,4,128,1,fp8,fp8,0,0.056406402587890626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,8,128,1,float16,fp8,0,0.055934399366378784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,8,128,1,float16,float16,0,0.06158080101013184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,8,128,1,fp8,fp8,0,0.05589119791984558
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,32,128,1,float16,float16,0,0.043681600689888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,32,128,1,float16,fp8,0,0.043244799971580504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,32,128,1,fp8,fp8,0,0.04322560131549835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,1,128,1,float16,float16,0,0.03743039965629578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,1,128,1,float16,fp8,0,0.03677279949188232
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,1,128,1,fp8,fp8,0,0.03654719889163971
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,2,128,1,float16,float16,0,0.03739359974861145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,2,128,1,float16,fp8,0,0.03700000047683716
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,4,128,1,float16,float16,0,0.03818880021572113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,4,128,1,float16,fp8,0,0.03707360029220581
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,4,128,1,fp8,fp8,0,0.037092798948287965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,8,128,1,float16,float16,0,0.039468801021575926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,8,128,1,float16,fp8,0,0.037041598558425905
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,8,128,1,fp8,fp8,0,0.037241598963737486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,32,128,1,float16,float16,0,0.02696000039577484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,32,128,1,float16,fp8,0,0.026900801062583923
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,32,128,1,fp8,fp8,0,0.027008000016212463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,1,128,1,float16,float16,0,0.02306720018386841
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,8,128,1,float16,fp8,0,0.17891680002212523
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,1,128,1,float16,fp8,0,0.022961600124835967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,2,128,1,float16,float16,0,0.023259200155735016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,2,128,1,float16,fp8,0,0.023030400276184082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,2,128,1,fp8,fp8,0,0.02299039959907532
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,4,128,1,float16,float16,0,0.024710400402545928
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,4,128,1,fp8,fp8,0,0.023710399866104126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,4,128,1,float16,fp8,0,0.023473599553108217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,8,128,1,float16,float16,0,0.024884800612926482
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,8,128,1,float16,fp8,0,0.023211200535297394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,2,128,1,fp8,fp8,0,0.03687199950218201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,1,128,1,float16,float16,0,0.9258527755737305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,1,128,1,float16,fp8,0,0.9586015701293945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,2,128,1,float16,float16,0,0.9119536399841308
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,1,128,1,fp8,fp8,0,1.1239791870117188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,1,128,1,fp8,fp8,0,0.023028799891471864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,2,128,1,fp8,fp8,0,0.9566384315490722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,4,128,1,float16,float16,0,0.9663311958312988
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,4,128,1,float16,fp8,0,0.9531855583190918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,4,128,1,fp8,fp8,0,0.9842304229736328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,8,128,1,float16,float16,0,1.0298336029052735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,8,128,1,float16,fp8,0,0.955294418334961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,2,128,1,float16,fp8,0,1.07008638381958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,32,128,1,float16,float16,0,0.7793680191040039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,8,128,1,fp8,fp8,0,1.155833625793457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,32,128,1,float16,fp8,0,0.7005599975585938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,32,128,1,fp8,fp8,0,0.7171216011047363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,1,128,1,float16,fp8,0,0.48988637924194334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,1,128,1,fp8,fp8,0,0.49698238372802733
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,2,128,1,float16,float16,0,0.4698351860046387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,2,128,1,float16,fp8,0,0.4943888187408447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,2,128,1,fp8,fp8,0,0.48836960792541506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,4,128,1,float16,float16,0,0.49912638664245607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,4,128,1,float16,fp8,0,0.48715839385986326
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,8,128,1,fp8,fp8,0,0.024638399481773376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,4,128,1,fp8,fp8,0,0.49320640563964846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,8,128,1,float16,float16,0,0.528278398513794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,8,128,1,float16,fp8,0,0.48993120193481443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,1,128,1,float16,float16,0,0.47484798431396485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,8,128,1,fp8,fp8,0,0.4899951934814453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,32,128,1,float16,fp8,0,0.36330399513244627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,32,128,1,fp8,fp8,0,0.3596031904220581
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,1,128,1,float16,float16,0,0.25178558826446534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,1,128,1,float16,fp8,0,0.25886240005493166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,1,128,1,fp8,fp8,0,0.2553344011306763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,2,128,1,float16,float16,0,0.2511136054992676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,2,128,1,float16,fp8,0,0.258622407913208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,2,128,1,fp8,fp8,0,0.25487680435180665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,4,128,1,float16,fp8,0,0.2575808048248291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,4,128,1,float16,float16,0,0.2600287914276123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,4,128,1,fp8,fp8,0,0.25309600830078127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,8,128,1,float16,float16,0,0.277675199508667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,8,128,1,fp8,fp8,0,0.2508960008621216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,8,128,1,float16,fp8,0,0.2546976089477539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,32,128,1,float16,float16,0,0.20552799701690674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,32,128,1,float16,fp8,0,0.1883903980255127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,32,128,1,fp8,fp8,0,0.190339195728302
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,1,128,1,float16,float16,0,0.13173760175704957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,1,128,1,float16,fp8,0,0.13355679512023927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,1,128,1,fp8,fp8,0,0.13557440042495728
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,2,128,1,float16,float16,0,0.13169920444488525
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,2,128,1,float16,fp8,0,0.13355519771575927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,4,128,1,float16,float16,0,0.1367568016052246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,4,128,1,fp8,fp8,0,0.1344815969467163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,8,128,1,float16,float16,0,0.14719040393829347
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,32,128,1,float16,float16,0,0.39408800601959226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,8,128,1,float16,fp8,0,0.13402559757232665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,8,128,1,fp8,fp8,0,0.13310879468917847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,32,128,1,float16,float16,0,0.10884640216827393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,32,128,1,float16,fp8,0,0.10067839622497558
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,32,128,1,fp8,fp8,0,0.10079679489135743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,1,128,1,float16,float16,0,0.07306399941444397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,1,128,1,float16,fp8,0,0.07211999893188477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,1,128,1,fp8,fp8,0,0.07201759815216065
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,2,128,1,float16,float16,0,0.0728384017944336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,2,128,1,float16,fp8,0,0.07247040271759034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,2,128,1,fp8,fp8,0,0.07238399982452393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,4,128,1,float16,float16,0,0.07590399980545044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,2,128,1,fp8,fp8,0,0.13551679849624634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,4,128,1,float16,fp8,0,0.07276960015296936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,4,128,1,fp8,fp8,0,0.07423200011253357
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,8,128,1,float16,float16,0,0.08059200048446655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,8,128,1,float16,fp8,0,0.07428799867630005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,8,128,1,fp8,fp8,0,0.07284479737281799
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,32,128,1,float16,float16,0,0.06206880211830139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,32,128,1,float16,fp8,0,0.057094401121139525
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,32,128,1,fp8,fp8,0,0.05767359733581543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,1,128,1,float16,float16,0,0.04175040125846863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,1,128,1,float16,fp8,0,0.042603200674057005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,1,128,1,fp8,fp8,0,0.04133760035037994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,2,128,1,float16,float16,0,0.04158880114555359
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,2,128,1,float16,fp8,0,0.04144960045814514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,4,128,1,float16,float16,0,0.04321280121803284
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,4,128,1,float16,fp8,0,0.04142560064792633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,4,128,1,fp8,fp8,0,0.0412304013967514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,8,128,1,float16,float16,0,0.04545120000839233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,8,128,1,float16,fp8,0,0.04121440052986145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,8,128,1,fp8,fp8,0,0.04132480025291443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,32,128,1,float16,float16,0,0.03442719876766205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,32,128,1,float16,fp8,0,0.033163198828697206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,32,128,1,fp8,fp8,0,0.033083200454711914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,4,128,1,float16,fp8,0,0.13364959955215455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,1,128,1,float16,fp8,0,0.026921600103378296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,1,128,1,float16,float16,0,0.027486398816108704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,1,128,1,fp8,fp8,0,0.026848000288009644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,2,128,1,float16,float16,0,0.027084800601005554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,2,128,1,fp8,fp8,0,0.02688640058040619
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,2,128,1,float16,fp8,0,0.027369600534439088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,4,128,1,float16,float16,0,0.02885119915008545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,4,128,1,float16,fp8,0,0.026900801062583923
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,8,128,1,float16,float16,0,0.029142400622367857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,4,128,1,fp8,fp8,0,0.027235201001167296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,8,128,1,float16,fp8,0,0.02699199914932251
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,8,128,1,fp8,fp8,0,0.026947200298309326
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,32,128,1,float16,fp8,0,0.020787200331687926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,32,128,1,fp8,fp8,0,0.02078240066766739
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,1,128,1,float16,float16,0,0.01671680063009262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,1,128,1,float16,fp8,0,0.01866399943828583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,1,128,1,fp8,fp8,0,0.018667200207710268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,2,128,1,float16,float16,0,0.018454399704933167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,2,128,1,float16,fp8,0,0.01865279972553253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,2,128,1,fp8,fp8,0,0.018648000061511995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,4,128,1,float16,float16,0,0.018651199340820313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,2,128,1,fp8,fp8,0,0.042310398817062375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,4,128,1,fp8,fp8,0,0.0187376007437706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,8,128,1,float16,float16,0,0.01870879977941513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,8,128,1,fp8,fp8,0,0.018692800402641298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,32,128,1,float16,float16,0,0.01880960017442703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,32,128,1,float16,fp8,0,0.018716800212860107
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,32,128,1,fp8,fp8,0,0.018718400597572328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,1,128,1,float16,float16,0,0.016769599914550782
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,1,128,1,float16,fp8,0,0.01690559983253479
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,1,128,1,fp8,fp8,0,0.01672479957342148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,2,128,1,float16,float16,0,0.016812799870967864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,2,128,1,float16,fp8,0,0.01666560024023056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,2,128,1,fp8,fp8,0,0.016755199432373045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,4,128,1,float16,float16,0,0.016777600347995757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,4,128,1,float16,fp8,0,0.016667200624942778
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,4,128,1,fp8,fp8,0,0.016707199811935424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,8,128,1,float16,float16,0,0.016771200299263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,8,128,1,float16,fp8,0,0.016812799870967864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,32,128,1,float16,float16,0,0.020772799849510193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,8,128,1,fp8,fp8,0,0.016620799899101257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,1,128,1,float16,float16,0,0.383353590965271
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,4,128,1,float16,fp8,0,0.01871519982814789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,1,128,1,float16,fp8,0,0.41146078109741213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,8,128,1,float16,fp8,0,0.0186831995844841
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,1,128,1,fp8,fp8,0,0.4111328125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,2,128,1,float16,float16,0,0.3750175952911377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,2,128,1,fp8,fp8,0,0.4071663856506348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,4,128,1,float16,float16,0,0.4003615856170654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,4,128,1,float16,fp8,0,0.4038368225097656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,4,128,1,fp8,fp8,0,0.40392160415649414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,8,128,1,float16,float16,0,0.4403999805450439
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,8,128,1,float16,fp8,0,0.4007823944091797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,8,128,1,fp8,fp8,0,0.40174078941345215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,32,128,1,float16,float16,0,0.34632320404052735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,32,128,1,float16,fp8,0,0.31518239974975587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,32,128,1,fp8,fp8,0,0.3165152072906494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,1,128,1,float16,float16,0,0.1980463981628418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,1,128,1,float16,fp8,0,0.20961759090423585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,1,128,1,fp8,fp8,0,0.2104975938796997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,2,128,1,float16,float16,0,0.19795680046081543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,2,128,1,float16,fp8,0,0.20983359813690186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,2,128,1,fp8,fp8,0,0.20920319557189943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,4,128,1,float16,float16,0,0.2078847885131836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,4,128,1,float16,fp8,0,0.20968959331512452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,4,128,1,fp8,fp8,0,0.20924160480499268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,8,128,1,float16,float16,0,0.22688798904418944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,8,128,1,float16,fp8,0,0.20751359462738037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,8,128,1,fp8,fp8,0,0.20909440517425537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,32,128,1,float16,float16,0,0.17957279682159424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,2,128,1,float16,fp8,0,0.41141600608825685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,32,128,1,float16,fp8,0,0.16600960493087769
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,32,128,1,fp8,fp8,0,0.16472480297088624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,1,128,1,float16,fp8,0,0.11197279691696167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,1,128,1,fp8,fp8,0,0.11091040372848511
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,2,128,1,float16,float16,0,0.10696640014648437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,2,128,1,float16,fp8,0,0.1109503984451294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,2,128,1,fp8,fp8,0,0.11188000440597534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,4,128,1,float16,float16,0,0.11074399948120117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,4,128,1,float16,fp8,0,0.11141279935836793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,4,128,1,fp8,fp8,0,0.11095520257949829
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,8,128,1,float16,float16,0,0.12149920463562011
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,8,128,1,float16,fp8,0,0.1111456036567688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,8,128,1,fp8,fp8,0,0.1122320055961609
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,32,128,1,float16,float16,0,0.09483360052108765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,32,128,1,float16,fp8,0,0.08789920210838317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,32,128,1,fp8,fp8,0,0.08641440272331238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,1,128,1,float16,float16,0,0.05847039818763733
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,1,128,1,float16,fp8,0,0.05816320180892944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,1,128,1,fp8,fp8,0,0.059324800968170166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,2,128,1,float16,float16,0,0.05771039724349976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,2,128,1,float16,fp8,0,0.05941600203514099
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,2,128,1,fp8,fp8,0,0.057993602752685544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,4,128,1,float16,float16,0,0.060008001327514646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,4,128,1,float16,fp8,0,0.05916320085525513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,4,128,1,fp8,fp8,0,0.05963199734687805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,8,128,1,float16,fp8,0,0.060001599788665774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,8,128,1,fp8,fp8,0,0.06003519892692566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,32,128,1,float16,float16,0,0.05519840121269226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,1,128,1,float16,float16,0,0.10929919481277466
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,32,128,1,float16,fp8,0,0.051551997661590576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,32,128,1,fp8,fp8,0,0.051444798707962036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,1,128,1,float16,float16,0,0.03504959940910339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,1,128,1,fp8,fp8,0,0.0354095995426178
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,2,128,1,float16,float16,0,0.03508000075817108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,2,128,1,float16,fp8,0,0.035473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,2,128,1,fp8,fp8,0,0.035339200496673585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,4,128,1,float16,float16,0,0.035278400778770445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,4,128,1,float16,fp8,0,0.03573279976844788
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,4,128,1,fp8,fp8,0,0.03526880145072937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,8,128,1,float16,fp8,0,0.03535839915275574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,8,128,1,float16,float16,0,0.03868640065193176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,8,128,1,fp8,fp8,0,0.03522399961948395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,32,128,1,float16,float16,0,0.0290336012840271
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,32,128,1,float16,fp8,0,0.028940799832344054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,32,128,1,fp8,fp8,0,0.02903839945793152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,1,128,1,float16,float16,0,0.021147200465202333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,1,128,1,float16,fp8,0,0.021891200542449953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,1,128,1,fp8,fp8,0,0.02268960028886795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,8,128,1,float16,float16,0,0.0652895987033844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,2,128,1,float16,float16,0,0.02074880003929138
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,2,128,1,float16,fp8,0,0.022681599855422972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,2,128,1,fp8,fp8,0,0.020814399421215057
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,4,128,1,float16,fp8,0,0.022720000147819518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,4,128,1,fp8,fp8,0,0.02274399995803833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,8,128,1,float16,float16,0,0.02284960001707077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,8,128,1,float16,fp8,0,0.022759999334812164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,8,128,1,fp8,fp8,0,0.022745600342750548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,32,128,1,float16,float16,0,0.016708800196647645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,32,128,1,float16,fp8,0,0.018727999925613404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,1,128,1,float16,float16,0,0.014632000029087067
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,1,128,1,float16,fp8,0,0.01462559998035431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,1,128,1,float16,fp8,0,0.03592959940433502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,1,128,1,fp8,fp8,0,0.014697599411010741
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,2,128,1,float16,float16,0,0.014606399834156037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,2,128,1,float16,fp8,0,0.014672000706195832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,2,128,1,fp8,fp8,0,0.014873600006103516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,4,128,1,float16,float16,0,0.014612799882888794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,4,128,1,float16,fp8,0,0.014990399777889251
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,8,128,1,float16,float16,0,0.01658879965543747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,8,128,1,float16,fp8,0,0.014801600575447082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,8,128,1,fp8,fp8,0,0.014891199767589569
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,32,128,1,float16,float16,0,0.015731200575828552
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,32,128,1,float16,fp8,0,0.016521599888801575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,32,128,1,fp8,fp8,0,0.01669120043516159
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,1,128,1,float16,float16,0,0.01470080018043518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,1,128,1,float16,fp8,0,0.014606399834156037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,1,128,1,fp8,fp8,0,0.014745600521564484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,2,128,1,float16,float16,0,0.014689600467681885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,2,128,1,float16,fp8,0,0.014575999975204468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,2,128,1,fp8,fp8,0,0.014534400403499603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,4,128,1,float16,float16,0,0.021588799357414246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,4,128,1,float16,float16,0,0.014735999703407287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,4,128,1,float16,fp8,0,0.014561599493026734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,4,128,1,fp8,fp8,0,0.014531199634075165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,8,128,1,float16,float16,0,0.014548799395561219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,8,128,1,float16,fp8,0,0.014555199444293976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,8,128,1,fp8,fp8,0,0.014547200500965118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,32,128,1,fp8,fp8,0,0.018603199720382692
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,32,128,1,float16,fp8,0,0.014510400593280792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,32,128,1,fp8,fp8,0,0.014582400023937226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,1,128,1,float16,fp8,0,0.012600000202655792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,1,128,1,fp8,fp8,0,0.012563200294971466
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,2,128,1,float16,float16,0,0.01361600011587143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,2,128,1,float16,fp8,0,0.01268479973077774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,4,128,1,fp8,fp8,0,0.014678399264812469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,2,128,1,fp8,fp8,0,0.01443839967250824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,4,128,1,float16,float16,0,0.014505599439144135
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,4,128,1,float16,fp8,0,0.014444799721240997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,4,128,1,fp8,fp8,0,0.014443199336528777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,8,128,1,float16,float16,0,0.014555199444293976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,8,128,1,float16,fp8,0,0.014440000057220459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,8,128,1,fp8,fp8,0,0.01465280055999756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,1,128,1,float16,float16,0,0.2362544059753418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,1,128,1,float16,fp8,0,0.2520143985748291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,32,128,1,float16,float16,0,0.014668799936771393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,1,128,1,fp8,fp8,0,0.2519344091415405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,1,128,1,float16,float16,0,0.014550399780273438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,2,128,1,float16,float16,0,0.23545920848846436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,2,128,1,float16,fp8,0,0.25146400928497314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,4,128,1,float16,float16,0,0.2449647903442383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,2,128,1,fp8,fp8,0,0.25005919933319093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,4,128,1,float16,fp8,0,0.2514336109161377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,4,128,1,fp8,fp8,0,0.25001599788665774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,8,128,1,float16,float16,0,0.26168479919433596
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,8,128,1,float16,fp8,0,0.25016160011291505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,32,128,1,float16,float16,0,0.1923359990119934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,8,128,1,fp8,fp8,0,0.24842400550842286
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,32,128,1,float16,fp8,0,0.1848464012145996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,1,128,1,float16,float16,0,0.12521920204162598
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,1,128,1,float16,fp8,0,0.13149919509887695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,2,128,1,float16,float16,0,0.12445919513702393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,2,128,1,float16,fp8,0,0.13026080131530762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,2,128,1,fp8,fp8,0,0.12994879484176636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,4,128,1,float16,float16,0,0.12859200239181517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,4,128,1,float16,fp8,0,0.12965760231018067
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,4,128,1,fp8,fp8,0,0.12967840433120728
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,8,128,1,float16,float16,0,0.13804479837417602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,8,128,1,float16,fp8,0,0.12966879606246948
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,32,128,1,float16,float16,0,0.1012495994567871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,32,128,1,float16,fp8,0,0.09741119742393493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,32,128,1,fp8,fp8,0,0.09802719950675964
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,1,128,1,float16,float16,0,0.0679967999458313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,1,128,1,float16,fp8,0,0.07002559900283814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,32,128,1,fp8,fp8,0,0.18272320032119752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,2,128,1,float16,float16,0,0.06797119975090027
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,1,128,1,fp8,fp8,0,0.13021600246429443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,2,128,1,float16,fp8,0,0.06997119784355163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,2,128,1,fp8,fp8,0,0.07024480104446411
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,4,128,1,float16,float16,0,0.07004960179328919
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,4,128,1,float16,fp8,0,0.07006080150604248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,4,128,1,fp8,fp8,0,0.06987360119819641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,8,128,1,float16,float16,0,0.07554559707641602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,8,128,1,fp8,fp8,0,0.07015200257301331
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,32,128,1,float16,float16,0,0.055537599325180056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,32,128,1,float16,fp8,0,0.05341920256614685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,32,128,1,fp8,fp8,0,0.053518402576446536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,1,128,1,float16,float16,0,0.03717440068721771
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,1,128,1,float16,fp8,0,0.03899039924144745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,1,128,1,fp8,fp8,0,0.03901279866695404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,8,128,1,fp8,fp8,0,0.1293280005455017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,2,128,1,float16,float16,0,0.03723039925098419
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,2,128,1,float16,fp8,0,0.039110401272773744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,2,128,1,fp8,fp8,0,0.03925600051879883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,4,128,1,float16,float16,0,0.03749600052833557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,4,128,1,float16,fp8,0,0.039110401272773744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,4,128,1,fp8,fp8,0,0.03869760036468506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,8,128,1,float16,float16,0,0.041659200191497804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,8,128,1,float16,fp8,0,0.038431999087333676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,8,128,1,fp8,fp8,0,0.03920640051364899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,32,128,1,float16,float16,0,0.030943998694419862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,32,128,1,float16,fp8,0,0.032148799300193785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,32,128,1,fp8,fp8,0,0.03094879984855652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,1,128,1,float16,float16,0,0.02391040027141571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,1,128,1,float16,fp8,0,0.0247871994972229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,1,128,1,fp8,fp8,0,0.0250575989484787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,2,128,1,float16,float16,0,0.022729599475860597
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,2,128,1,float16,fp8,0,0.024849599599838255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,2,128,1,fp8,fp8,0,0.02484000027179718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,4,128,1,float16,float16,0,0.024851199984550477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,4,128,1,float16,fp8,0,0.02489439994096756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,8,128,1,float16,float16,0,0.02481119930744171
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,8,128,1,float16,fp8,0,0.024748800694942473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,8,128,1,fp8,fp8,0,0.024905599653720856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,32,128,1,float16,float16,0,0.018643200397491455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,32,128,1,float16,fp8,0,0.018665599822998046
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,32,128,1,fp8,fp8,0,0.018662400543689728
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,1,128,1,float16,float16,0,0.014553600549697876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,1,128,1,float16,fp8,0,0.016582399606704712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,1,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,2,128,1,float16,float16,0,0.01462559998035431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,2,128,1,float16,fp8,0,0.01661760061979294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,2,128,1,fp8,fp8,0,0.01656000018119812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,4,128,1,float16,float16,0,0.016599999368190767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,4,128,1,float16,fp8,0,0.016648000478744505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,1,128,1,fp8,fp8,0,0.0699504017829895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,8,128,1,float16,float16,0,0.016732800006866454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,8,128,1,float16,fp8,0,0.016630400717258454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,8,128,1,fp8,fp8,0,0.016655999422073364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,32,128,1,float16,float16,0,0.014280000329017639
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,4,128,1,fp8,fp8,0,0.024825599789619446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,32,128,1,float16,fp8,0,0.01464959979057312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,32,128,1,fp8,fp8,0,0.014619199931621552
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,1,128,1,float16,float16,0,0.01242400035262108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,1,128,1,float16,fp8,0,0.012511999905109405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,1,128,1,fp8,fp8,0,0.01234399974346161
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,2,128,1,float16,float16,0,0.012457600235939026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,2,128,1,float16,fp8,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,2,128,1,fp8,fp8,0,0.012331199645996094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,4,128,1,float16,float16,0,0.012495999783277511
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,4,128,1,float16,fp8,0,0.01247519999742508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,4,128,1,fp8,fp8,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,8,128,1,float16,float16,0,0.012598399817943574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,8,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,8,128,1,fp8,fp8,0,0.012414400279521943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,32,128,1,float16,float16,0,0.012468799948692322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,32,128,1,float16,fp8,0,0.012467200309038163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,32,128,1,fp8,fp8,0,0.012388800084590913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,1,128,1,float16,float16,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,1,128,1,float16,fp8,0,0.010673599690198899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,1,128,1,fp8,fp8,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,2,128,1,float16,float16,0,0.010623999685049058
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,2,128,1,float16,fp8,0,0.012345600128173827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,2,128,1,fp8,fp8,0,0.012409599870443344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,4,128,1,float16,float16,0,0.012372799962759019
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,8,128,1,float16,fp8,0,0.06984320282936096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,4,128,1,fp8,fp8,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,8,128,1,float16,float16,0,0.010648000240325927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,8,128,1,float16,fp8,0,0.010892800241708755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,8,128,1,fp8,fp8,0,0.011004800349473954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,32,128,1,float16,float16,0,0.012439999729394913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,32,128,1,float16,fp8,0,0.011726400256156922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,32,128,1,fp8,fp8,0,0.011563199758529662
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,1,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,1,128,1,float16,fp8,0,0.010593599826097488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,1,128,1,fp8,fp8,0,0.01053600013256073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,2,128,1,float16,float16,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,2,128,1,float16,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,2,128,1,fp8,fp8,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,4,128,1,float16,float16,0,0.010684800148010255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,4,128,1,float16,fp8,0,0.010659199953079224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,4,128,1,fp8,fp8,0,0.01647839993238449
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,4,128,1,fp8,fp8,0,0.010596799850463866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,8,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,8,128,1,float16,fp8,0,0.010751999914646149
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,8,128,1,fp8,fp8,0,0.010523200035095215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,1,128,1,float16,fp8,0,0.20107679367065429
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,1,128,1,float16,float16,0,0.19648159742355348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,4,128,1,float16,fp8,0,0.012425599992275238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,1,128,1,fp8,fp8,0,0.20144639015197754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,2,128,1,float16,float16,0,0.1965343952178955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,2,128,1,float16,fp8,0,0.19961919784545898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,2,128,1,fp8,fp8,0,0.2013711929321289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,4,128,1,float16,fp8,0,0.19924960136413575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,4,128,1,fp8,fp8,0,0.19904799461364747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,8,128,1,float16,float16,0,0.21201438903808595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,8,128,1,float16,fp8,0,0.19882240295410156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,8,128,1,fp8,fp8,0,0.19856799840927125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,32,128,1,float16,float16,0,0.13709280490875245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,32,128,1,float16,fp8,0,0.1308511972427368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,32,128,1,fp8,fp8,0,0.13143199682235718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,1,128,1,float16,fp8,0,0.10478240251541138
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,2,128,1,float16,float16,0,0.10511360168457032
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,2,128,1,float16,fp8,0,0.10449119806289672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,2,128,1,fp8,fp8,0,0.10448000431060792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,4,128,1,float16,float16,0,0.10682879686355591
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,4,128,1,float16,fp8,0,0.1047935962677002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,4,128,1,fp8,fp8,0,0.10476959943771362
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,8,128,1,float16,float16,0,0.11105120182037354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,8,128,1,float16,fp8,0,0.10442719459533692
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,8,128,1,fp8,fp8,0,0.10320960283279419
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,32,128,1,float16,float16,0,0.07211840152740479
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,32,128,1,float16,fp8,0,0.0699184000492096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,4,128,1,float16,float16,0,0.2033247947692871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,32,128,1,fp8,fp8,0,0.06991360187530518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,1,128,1,float16,fp8,0,0.05551999807357788
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,1,128,1,fp8,fp8,0,0.05551519989967346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,2,128,1,float16,float16,0,0.05548480153083801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,1,128,1,float16,float16,0,0.10508639812469482
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,2,128,1,float16,fp8,0,0.05557760000228882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,2,128,1,fp8,fp8,0,0.056304001808166505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,4,128,1,float16,float16,0,0.05681279897689819
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,1,128,1,fp8,fp8,0,0.10431679487228393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,4,128,1,float16,fp8,0,0.056225597858428955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,4,128,1,fp8,fp8,0,0.05582399964332581
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,8,128,1,float16,float16,0,0.06047999858856201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,8,128,1,fp8,fp8,0,0.055638402700424194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,32,128,1,float16,float16,0,0.03932160139083862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,32,128,1,float16,fp8,0,0.03868800103664398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,32,128,1,fp8,fp8,0,0.03914720118045807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,1,128,1,float16,fp8,0,0.03140639960765838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,1,128,1,fp8,fp8,0,0.03128319978713989
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,2,128,1,float16,float16,0,0.032948800921440126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,2,128,1,float16,fp8,0,0.031251201033592226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,2,128,1,fp8,fp8,0,0.03199360072612763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,4,128,1,float16,float16,0,0.03300639986991882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,4,128,1,float16,fp8,0,0.031641599535942075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,4,128,1,fp8,fp8,0,0.03155840039253235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,1,128,1,float16,float16,0,0.05592479705810547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,8,128,1,float16,float16,0,0.033228799700737
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,8,128,1,float16,fp8,0,0.03303839862346649
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,8,128,1,fp8,fp8,0,0.031064000725746155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,32,128,1,float16,float16,0,0.024408000707626342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,32,128,1,float16,fp8,0,0.024873599410057068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,32,128,1,fp8,fp8,0,0.02512640058994293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,1,128,1,float16,float16,0,0.02077440023422241
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,1,128,1,float16,fp8,0,0.020803199708461763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,8,128,1,float16,fp8,0,0.05606080293655395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,2,128,1,float16,float16,0,0.02081120014190674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,2,128,1,float16,fp8,0,0.020764799416065217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,1,128,1,float16,float16,0,0.03128960132598877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,2,128,1,fp8,fp8,0,0.020764799416065217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,4,128,1,float16,fp8,0,0.020820799469947814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,4,128,1,fp8,fp8,0,0.020755200088024138
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,8,128,1,float16,float16,0,0.022628800570964815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,8,128,1,float16,fp8,0,0.020763200521469117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,8,128,1,fp8,fp8,0,0.020904000103473663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,32,128,1,float16,float16,0,0.016515199840068818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,32,128,1,float16,fp8,0,0.01661760061979294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,32,128,1,fp8,fp8,0,0.01658399999141693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,1,128,1,float16,float16,0,0.014579200744628906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,1,128,1,float16,fp8,0,0.014604799449443817
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,1,128,1,fp8,fp8,0,0.01467680037021637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,2,128,1,float16,float16,0,0.01465120017528534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,2,128,1,float16,fp8,0,0.014636799693107605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,2,128,1,fp8,fp8,0,0.014633600413799287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,4,128,1,float16,float16,0,0.014587199687957764
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,4,128,1,float16,fp8,0,0.014654399454593658
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,4,128,1,fp8,fp8,0,0.014664000272750855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,8,128,1,float16,float16,0,0.014670400321483612
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,8,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,1,128,1,fp8,fp8,0,0.020820799469947814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,8,128,1,fp8,fp8,0,0.014628799259662628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,32,128,1,float16,float16,0,0.012561599910259246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,32,128,1,float16,fp8,0,0.012388800084590913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,32,128,1,fp8,fp8,0,0.012595200538635254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,1,128,1,float16,float16,0,0.01061279997229576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,4,128,1,float16,float16,0,0.02070239931344986
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,1,128,1,float16,fp8,0,0.010569600015878677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,1,128,1,fp8,fp8,0,0.010596799850463866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,2,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,2,128,1,fp8,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,4,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,4,128,1,float16,fp8,0,0.010598400235176086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,4,128,1,fp8,fp8,0,0.010558400303125381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,8,128,1,float16,float16,0,0.011145599931478501
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,8,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,8,128,1,fp8,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,32,128,1,float16,float16,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,32,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,1,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,1,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,1,128,1,fp8,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,2,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,2,128,1,float16,float16,0,0.010868799686431885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,2,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,4,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,4,128,1,float16,float16,0,0.011622399836778641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,4,128,1,fp8,fp8,0,0.01061279997229576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,8,128,1,float16,float16,0,0.010926400125026704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,8,128,1,fp8,fp8,0,0.01058719977736473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,32,128,1,float16,float16,0,0.011068800091743469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,32,128,1,float16,fp8,0,0.010633599758148194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,32,128,1,fp8,fp8,0,0.010915199667215348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,1,128,1,float16,float16,0,0.010654400289058685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,2,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,1,128,1,float16,fp8,0,0.010859200358390808
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,1,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,2,128,1,float16,float16,0,0.010675200074911118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,2,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,2,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,32,128,1,float16,fp8,0,0.0123648002743721
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,4,128,1,float16,fp8,0,0.010940799862146378
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,4,128,1,fp8,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,8,128,1,float16,float16,0,0.010585600137710571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,8,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,8,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,1,128,1,float16,float16,0,0.1753440022468567
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,1,128,1,float16,fp8,0,0.17439199686050416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,1,128,1,fp8,fp8,0,0.17475999593734742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,2,128,1,float16,float16,0,0.1764896035194397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,2,128,1,float16,fp8,0,0.1747056007385254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,2,128,1,fp8,fp8,0,0.17416800260543824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,4,128,1,float16,float16,0,0.1787536025047302
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,8,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,4,128,1,float16,fp8,0,0.17366880178451538
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,4,128,1,fp8,fp8,0,0.1745360016822815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,8,128,1,float16,float16,0,0.17984639406204223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,8,128,1,fp8,fp8,0,0.17474240064620972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,32,128,1,float16,float16,0,0.10709439516067505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,32,128,1,float16,fp8,0,0.10481760501861573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,32,128,1,fp8,fp8,0,0.10465439558029174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,1,128,1,float16,float16,0,0.09127839803695678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,1,128,1,float16,fp8,0,0.09036480188369751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,1,128,1,fp8,fp8,0,0.09047520160675049
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,2,128,1,float16,float16,0,0.09076319932937622
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,2,128,1,float16,fp8,0,0.09048159718513489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,2,128,1,fp8,fp8,0,0.0904640018939972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,4,128,1,float16,float16,0,0.09356319904327393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,4,128,1,float16,fp8,0,0.0905023992061615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,4,128,1,fp8,fp8,0,0.09039520025253296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,8,128,1,float16,float16,0,0.09504799842834473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,8,128,1,fp8,fp8,0,0.09056640267372132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,32,128,1,float16,float16,0,0.05962560176849365
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,32,128,1,float16,fp8,0,0.055550402402877806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,8,128,1,float16,fp8,0,0.17357439994812013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,32,128,1,fp8,fp8,0,0.05698080062866211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,1,128,1,float16,float16,0,0.05160319805145264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,1,128,1,float16,fp8,0,0.049769601225852965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,1,128,1,fp8,fp8,0,0.049649599194526675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,2,128,1,float16,float16,0,0.05168480277061462
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,8,128,1,float16,fp8,0,0.09040319919586182
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,2,128,1,float16,fp8,0,0.049614399671554565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,2,128,1,fp8,fp8,0,0.04969759881496429
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,4,128,1,float16,float16,0,0.05168319940567016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,4,128,1,float16,fp8,0,0.04955520033836365
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,4,128,1,fp8,fp8,0,0.049399998784065244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,8,128,1,float16,float16,0,0.05206400156021118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,8,128,1,fp8,fp8,0,0.04951840043067932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,32,128,1,float16,float16,0,0.03306879997253418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,32,128,1,float16,fp8,0,0.0330159991979599
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,32,128,1,fp8,fp8,0,0.03301439881324768
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,1,128,1,float16,float16,0,0.03079040050506592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,1,128,1,float16,fp8,0,0.029003199934959412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,1,128,1,fp8,fp8,0,0.02911199927330017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,2,128,1,float16,float16,0,0.03059520125389099
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,4,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,2,128,1,fp8,fp8,0,0.02900159955024719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,4,128,1,float16,fp8,0,0.028951999545097352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,4,128,1,fp8,fp8,0,0.02890399992465973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,8,128,1,float16,float16,0,0.03097119927406311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,8,128,1,float16,fp8,0,0.028977599740028382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,8,128,1,fp8,fp8,0,0.028964799642562867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,8,128,1,float16,fp8,0,0.04962559938430786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,32,128,1,float16,float16,0,0.0207056000828743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,32,128,1,float16,fp8,0,0.020819200575351714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,32,128,1,fp8,fp8,0,0.020828799903392793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,1,128,1,float16,float16,0,0.020343999564647674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,1,128,1,float16,fp8,0,0.018751999735832213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,1,128,1,fp8,fp8,0,0.01892800033092499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,2,128,1,float16,float16,0,0.018910400569438934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,2,128,1,float16,fp8,0,0.01903039962053299
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,2,128,1,fp8,fp8,0,0.018881599605083465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,4,128,1,float16,float16,0,0.020665599405765532
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,4,128,1,float16,fp8,0,0.01878879964351654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,4,128,1,fp8,fp8,0,0.01873439997434616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,8,128,1,float16,float16,0,0.020535999536514284
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,8,128,1,float16,fp8,0,0.02059199959039688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,8,128,1,fp8,fp8,0,0.019006399810314177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,32,128,1,float16,float16,0,0.014868800342082978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,32,128,1,float16,fp8,0,0.014591999351978302
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,32,128,1,fp8,fp8,0,0.014772799611091614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,1,128,1,float16,float16,0,0.014612799882888794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,1,128,1,float16,fp8,0,0.014628799259662628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,1,128,1,fp8,fp8,0,0.014455999433994293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,2,128,1,float16,float16,0,0.01465280055999756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,2,128,1,float16,fp8,0,0.01448799967765808
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,2,128,1,fp8,fp8,0,0.014545600116252898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,4,128,1,float16,float16,0,0.01451680064201355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,4,128,1,float16,fp8,0,0.014499199390411378
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,4,128,1,fp8,fp8,0,0.01446239948272705
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,8,128,1,float16,float16,0,0.014660799503326416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,8,128,1,float16,fp8,0,0.014528000354766845
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,32,128,1,float16,float16,0,0.011587200313806533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,8,128,1,fp8,fp8,0,0.014641599357128143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,32,128,1,float16,fp8,0,0.010790400207042694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,32,128,1,fp8,fp8,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,1,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,1,128,1,float16,fp8,0,0.010787200182676315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,1,128,1,fp8,fp8,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,2,128,1,float16,float16,0,0.010809600353240967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,2,128,1,float16,fp8,0,0.010649599879980088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,2,128,1,fp8,fp8,0,0.010793600231409073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,4,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,4,128,1,float16,fp8,0,0.010787200182676315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,8,128,1,float16,float16,0,0.01082720011472702
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,8,128,1,float16,fp8,0,0.010713600367307664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,8,128,1,fp8,fp8,0,0.010595200210809707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,32,128,1,float16,float16,0,0.011054400354623795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,32,128,1,float16,fp8,0,0.010657600313425063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,32,128,1,fp8,fp8,0,0.010630399733781815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,1,128,1,float16,float16,0,0.010542400181293488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,1,128,1,float16,fp8,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,1,128,1,fp8,fp8,0,0.01061279997229576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,2,128,1,float16,float16,0,0.010718400031328202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,2,128,1,float16,fp8,0,0.010583999752998351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,2,128,1,fp8,fp8,0,0.01072480008006096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,4,128,1,float16,float16,0,0.010644800215959548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,4,128,1,float16,fp8,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,4,128,1,fp8,fp8,0,0.010654400289058685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,8,128,1,float16,float16,0,0.010648000240325927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,4,128,1,float16,float16,0,0.029531198740005492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,8,128,1,float16,fp8,0,0.01061440035700798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,8,128,1,fp8,fp8,0,0.01058719977736473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,32,128,1,float16,float16,0,0.010684800148010255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,32,128,1,float16,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,32,128,1,fp8,fp8,0,0.010601600259542465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,1,128,1,float16,fp8,0,0.010569600015878677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,4,128,1,fp8,fp8,0,0.0106175996363163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,1,128,1,fp8,fp8,0,0.010547199845314026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,2,128,1,float16,float16,0,0.010849600285291671
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,2,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,2,128,1,fp8,fp8,0,0.010608000308275222
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,4,128,1,float16,float16,0,0.010558400303125381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,4,128,1,float16,fp8,0,0.01061279997229576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,4,128,1,fp8,fp8,0,0.010609599947929382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,8,128,1,float16,float16,0,0.010606399923563003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,8,128,1,float16,fp8,0,0.010648000240325927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,8,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,1,128,1,float16,float16,0,0.16784640550613403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,1,128,1,float16,fp8,0,0.1604032039642334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,2,128,1,float16,fp8,0,0.029091200232505797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,1,128,1,fp8,fp8,0,0.1603008031845093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,2,128,1,float16,fp8,0,0.1604431986808777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,1,128,1,float16,float16,0,0.010369600355625152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,2,128,1,fp8,fp8,0,0.16039999723434448
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,4,128,1,float16,float16,0,0.1682479977607727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,4,128,1,float16,fp8,0,0.16039520502090454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,4,128,1,fp8,fp8,0,0.15990560054779052
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,8,128,1,float16,float16,0,0.17345919609069824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,8,128,1,float16,fp8,0,0.1602031946182251
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,32,128,1,float16,float16,0,0.09842399954795837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,8,128,1,fp8,fp8,0,0.16044800281524657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,32,128,1,float16,fp8,0,0.09042400121688843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,32,128,1,fp8,fp8,0,0.09124159812927246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,1,128,1,float16,float16,0,0.08861759901046753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,1,128,1,fp8,fp8,0,0.08404800295829773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,1,128,1,float16,fp8,0,0.08444479703903199
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,2,128,1,float16,float16,0,0.08857120275497436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,2,128,1,float16,fp8,0,0.08444640040397644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,2,128,1,fp8,fp8,0,0.08426880240440368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,4,128,1,float16,fp8,0,0.08424320220947265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,4,128,1,float16,float16,0,0.09076640009880066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,4,128,1,fp8,fp8,0,0.08425440192222595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,2,128,1,float16,float16,0,0.16858240365982055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,8,128,1,float16,fp8,0,0.084334397315979
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,8,128,1,float16,float16,0,0.09195520281791687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,8,128,1,fp8,fp8,0,0.08436800241470337
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,32,128,1,float16,float16,0,0.05161280035972595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,32,128,1,float16,fp8,0,0.05128480195999145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,32,128,1,fp8,fp8,0,0.0514303982257843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,1,128,1,fp8,fp8,0,0.04743039906024933
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,1,128,1,float16,fp8,0,0.04743840098381043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,2,128,1,float16,float16,0,0.04962719976902008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,2,128,1,fp8,fp8,0,0.04736959934234619
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,4,128,1,float16,float16,0,0.04951040148735046
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,4,128,1,fp8,fp8,0,0.04738079905509949
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,8,128,1,float16,float16,0,0.04967199862003326
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,8,128,1,float16,fp8,0,0.047391998767852786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,8,128,1,fp8,fp8,0,0.047337600588798524
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,32,128,1,float16,float16,0,0.031038400530815125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,32,128,1,float16,fp8,0,0.029057601094245912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,32,128,1,fp8,fp8,0,0.029091200232505797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,1,128,1,float16,float16,0,0.029129600524902342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,1,128,1,float16,fp8,0,0.028543999791145323
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,1,128,1,fp8,fp8,0,0.026939201354980468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,2,128,1,float16,float16,0,0.028993600606918336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,2,128,1,float16,fp8,0,0.026940798759460448
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,2,128,1,fp8,fp8,0,0.02744640111923218
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,4,128,1,float16,float16,0,0.029267200827598573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,4,128,1,float16,fp8,0,0.02884320020675659
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,1,128,1,float16,float16,0,0.049446401000022885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,4,128,1,fp8,fp8,0,0.02861759960651398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,8,128,1,float16,float16,0,0.02980000078678131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,8,128,1,float16,fp8,0,0.028814399242401124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,8,128,1,fp8,fp8,0,0.02712000012397766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,32,128,1,float16,float16,0,0.02080959975719452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,2,128,1,float16,fp8,0,0.0474016010761261
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,32,128,1,fp8,fp8,0,0.020582400262355804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,1,128,1,float16,float16,0,0.018780800700187682
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,4,128,1,float16,fp8,0,0.04748159945011139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,1,128,1,float16,fp8,0,0.018769599497318268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,1,128,1,fp8,fp8,0,0.018699200451374055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,2,128,1,float16,fp8,0,0.01879040002822876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,4,128,1,float16,float16,0,0.018785600364208222
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,4,128,1,float16,fp8,0,0.018739199638366698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,4,128,1,fp8,fp8,0,0.01879040002822876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,8,128,1,float16,float16,0,0.019088000059127808
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,8,128,1,float16,fp8,0,0.018619200587272643
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,8,128,1,fp8,fp8,0,0.018719999492168425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,32,128,1,float16,float16,0,0.014628799259662628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,32,128,1,float16,fp8,0,0.01440960019826889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,32,128,1,fp8,fp8,0,0.014526399970054626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,1,128,1,float16,float16,0,0.014574399590492249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,1,128,1,float16,fp8,0,0.01449279934167862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,1,128,1,fp8,fp8,0,0.01449120044708252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,2,128,1,float16,float16,0,0.014472000300884247
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,2,128,1,float16,fp8,0,0.014558400213718414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,2,128,1,fp8,fp8,0,0.01451359987258911
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,4,128,1,float16,float16,0,0.014484800398349762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,4,128,1,float16,fp8,0,0.013918399810791016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,4,128,1,fp8,fp8,0,0.014267200231552124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,8,128,1,float16,float16,0,0.01446560025215149
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,8,128,1,float16,fp8,0,0.013510400056838989
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,8,128,1,fp8,fp8,0,0.014457599818706512
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,2,128,1,float16,float16,0,0.018988800048828126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,32,128,1,float16,float16,0,0.012537600100040435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,2,128,1,fp8,fp8,0,0.018721599876880646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,32,128,1,fp8,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,1,128,1,float16,float16,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,1,128,1,float16,fp8,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,1,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,2,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,2,128,1,float16,fp8,0,0.01071999967098236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,2,128,1,fp8,fp8,0,0.010644800215959548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,4,128,1,float16,float16,0,0.01072160005569458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,4,128,1,float16,fp8,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,4,128,1,fp8,fp8,0,0.01114400029182434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,8,128,1,float16,float16,0,0.010710400342941285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,8,128,1,float16,fp8,0,0.010569600015878677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,8,128,1,fp8,fp8,0,0.01064319983124733
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,32,128,1,float16,float16,0,0.010828799754381179
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,32,128,1,float16,fp8,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,32,128,1,fp8,fp8,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,1,128,1,float16,float16,0,0.010601600259542465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,1,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,1,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,2,128,1,float16,float16,0,0.010552000254392624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,2,128,1,float16,fp8,0,0.010527999699115753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,2,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,32,128,1,float16,fp8,0,0.02022880017757416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,4,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,4,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,8,128,1,float16,float16,0,0.010532800108194351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,8,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,8,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,32,128,1,float16,fp8,0,0.010644800215959548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,32,128,1,float16,float16,0,0.010503999888896942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,32,128,1,float16,fp8,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,32,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,1,128,1,float16,float16,0,0.01061279997229576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,1,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,1,128,1,fp8,fp8,0,0.010326399654150008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,2,128,1,float16,float16,0,0.010527999699115753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,2,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,2,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,4,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,4,128,1,float16,fp8,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,4,128,1,fp8,fp8,0,0.010527999699115753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,8,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,8,128,1,float16,fp8,0,0.010678400099277497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,8,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,1,128,1,float16,fp8,0,0.1530511975288391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,1,128,1,fp8,fp8,0,0.15303679704666137
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,0,0.16459039449691773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,0,0.15205440521240235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,2,128,1,fp8,fp8,0,0.15233759880065917
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,4,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,0,0.1531407952308655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,0,0.16431200504302979
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,4,128,1,fp8,fp8,0,0.15451840162277222
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,0,0.15309280157089233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,8,128,1,fp8,fp8,0,0.15433919429779053
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,0,0.0885200023651123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,32,128,1,fp8,fp8,0,0.0820688009262085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,0,0.08258399963378907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,1,128,1,float16,float16,0,0.08677440285682678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,1,128,1,float16,fp8,0,0.08248479962348938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,1,128,1,float16,float16,0,0.16606240272521972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,0,0.08886880278587342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,0,0.08223199844360352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,0,0.0879151999950409
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,2,128,1,fp8,fp8,0,0.08265439867973327
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,0,0.08222079873085023
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,4,128,1,fp8,fp8,0,0.08274400234222412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,0,0.08831520080566406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,8,128,1,fp8,fp8,0,0.08221279978752136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,0,0.08270080089569092
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,0,0.05016800165176392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,0,0.046387198567390445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,32,128,1,fp8,fp8,0,0.0453247994184494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,1,128,1,float16,float16,0,0.04979679882526398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,1,128,1,float16,fp8,0,0.04547199904918671
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,0,0.04949919879436493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,0,0.1643407940864563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,1,128,1,fp8,fp8,0,0.045603200793266296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,0,0.04553439915180206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,2,128,1,fp8,fp8,0,0.045500800013542175
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,0,0.04975680112838745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,4,128,1,fp8,fp8,0,0.04545280039310455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,0,0.049667200446128844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,0,0.04555999934673309
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,8,128,1,fp8,fp8,0,0.045684799551963806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,0,0.029161599278450013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,0,0.027153599262237548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,32,128,1,fp8,fp8,0,0.027118399739265442
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,1,128,1,float16,float16,0,0.029110398888587952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,1,128,1,float16,fp8,0,0.02696160078048706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,1,128,1,fp8,fp8,0,0.08064479827880859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,0,0.027001601457595826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,2,128,1,fp8,fp8,0,0.026867198944091796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,0,0.02905279994010925
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,0,0.026915198564529418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,4,128,1,fp8,fp8,0,0.027004799246788024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,0,0.028907200694084166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,0,0.026958400011062623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,8,128,1,fp8,fp8,0,0.026913601160049438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,0,0.020694400370121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,0,0.018691200017929076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,32,128,1,fp8,fp8,0,0.01857440024614334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,1,128,1,float16,float16,0,0.018671999871730804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,1,128,1,float16,fp8,0,0.018628799915313722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,1,128,1,fp8,fp8,0,0.01874080002307892
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,0,0.01880960017442703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,0,0.0456063985824585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,0,0.018611200153827667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,2,128,1,fp8,fp8,0,0.018699200451374055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,0,0.018755200505256652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,0,0.01860000044107437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,4,128,1,fp8,fp8,0,0.018598400056362152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,0,0.018753600120544434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,1,128,1,fp8,fp8,0,0.027046400308609008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,8,128,1,fp8,fp8,0,0.018588800728321076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,0,0.02901119887828827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,0,0.014748799800872802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,0,0.012604799866676331
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,32,128,1,fp8,fp8,0,0.014132800698280334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,1,128,1,float16,float16,0,0.014632000029087067
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,1,128,1,float16,fp8,0,0.013892799615859985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,1,128,1,fp8,fp8,0,0.013206399977207184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,0,0.014769600331783294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,0,0.012603199481964112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,2,128,1,fp8,fp8,0,0.014567999541759491
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,0,0.014569599926471711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,4,128,1,fp8,fp8,0,0.014470399916172027
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,0,0.014524799585342408
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,0,0.012545600533485413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,0,0.012515200674533844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,32,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,1,128,1,float16,float16,0,0.012531200051307678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,1,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,1,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,0,0.012513600289821625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,2,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,4,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,0,0.018646399676799773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,0,0.010862399637699128
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,0,0.010681600123643876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,8,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,32,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,1,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,1,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,1,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,0,0.010639999806880952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,8,128,1,fp8,fp8,0,0.014558400213718414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,4,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,8,128,1,fp8,fp8,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,32,128,1,fp8,fp8,0,0.010304000228643417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,1,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,1,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,2,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,4,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,0,0.010353600233793258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,8,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,2,128,1,fp8,fp8,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,1,128,1,float16,float16,0,0.010606399923563003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,1,128,1,float16,fp8,0,8.876107025146485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,1,128,1,fp8,fp8,0,8.892527770996093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,1,128,1,float16,float16,0,13.477140808105469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,2,128,1,float16,float16,0,13.419291687011718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,2,128,1,float16,fp8,0,9.084127807617188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,2,128,1,fp8,fp8,0,9.2583740234375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,4,128,1,float16,fp8,0,8.514571380615234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,4,128,1,float16,float16,0,14.702217102050781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,4,128,1,fp8,fp8,0,10.028241729736328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,8,128,1,float16,fp8,0,9.205155181884766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,8,128,1,float16,float16,0,16.8679443359375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,8,128,1,fp8,fp8,0,9.185652923583984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,24,128,1,float16,fp8,0,4.275523376464844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,1,128,1,float16,float16,0,6.699212646484375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,24,128,1,fp8,fp8,0,4.387031936645508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,1,128,1,float16,fp8,0,4.079774475097656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,1,128,1,fp8,fp8,0,4.129560089111328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,2,128,1,float16,fp8,0,4.1423393249511715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,2,128,1,float16,float16,0,6.936335754394531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,2,128,1,fp8,fp8,0,4.411614227294922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,4,128,1,float16,fp8,0,4.529694366455078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,4,128,1,float16,float16,0,6.723310089111328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,4,128,1,fp8,fp8,0,4.46123046875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,8,128,1,float16,fp8,0,4.454800033569336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,8,128,1,fp8,fp8,0,4.558011245727539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,8,128,1,float16,float16,0,7.7479103088378904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,24,128,1,float16,fp8,0,2.154961585998535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,24,128,1,fp8,fp8,0,2.867527961730957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,1,128,1,float16,float16,0,3.0550928115844727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,1,128,1,float16,fp8,0,2.077427291870117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,1,128,1,fp8,fp8,0,2.1904272079467773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,2,128,1,float16,float16,0,3.036065673828125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,2,128,1,float16,fp8,0,2.224046325683594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,2,128,1,fp8,fp8,0,2.1283632278442384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,4,128,1,float16,float16,0,2.3828224182128905
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,4,128,1,float16,fp8,0,2.121075248718262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,4,128,1,fp8,fp8,0,2.223929595947266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,8,128,1,float16,float16,0,2.7579599380493165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,8,128,1,float16,fp8,0,2.0786415100097657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,8,128,1,fp8,fp8,0,2.325424003601074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,24,128,1,float16,fp8,0,1.1675040245056152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,24,128,1,fp8,fp8,0,1.1341919898986816
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,1,128,1,float16,float16,0,1.2606191635131836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,1,128,1,float16,fp8,0,1.5358896255493164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,1,128,1,fp8,fp8,0,1.1015567779541016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,2,128,1,float16,float16,0,1.2565168380737304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,2,128,1,float16,fp8,0,1.1719167709350586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,2,128,1,fp8,fp8,0,1.1214112281799316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,4,128,1,float16,float16,0,1.2731264114379883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,4,128,1,float16,fp8,0,1.2928768157958985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,4,128,1,fp8,fp8,0,1.2551136016845703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,8,128,1,float16,float16,0,1.26670560836792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,8,128,1,float16,fp8,0,1.1254768371582031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,8,128,1,fp8,fp8,0,1.1019455909729003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,1,128,1,float16,float16,0,7.673062133789062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,1,128,1,float16,fp8,0,4.830121612548828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,1,128,1,fp8,fp8,0,4.996425628662109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,2,128,1,float16,float16,0,8.21579360961914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,2,128,1,float16,fp8,0,4.865273666381836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,2,128,1,fp8,fp8,0,5.088224029541015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,4,128,1,float16,float16,0,8.316683197021485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,4,128,1,float16,fp8,0,5.025281524658203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,4,128,1,fp8,fp8,0,5.069046401977539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,8,128,1,float16,float16,0,8.148417663574218
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,8,128,1,float16,fp8,0,4.967689514160156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,8,128,1,fp8,fp8,0,5.093467330932617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,24,128,1,float16,float16,0,1.3019328117370605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,24,128,1,float16,float16,0,2.743569564819336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,24,128,1,float16,fp8,0,2.6859664916992188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,24,128,1,float16,float16,0,6.599400329589844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,24,128,1,fp8,fp8,0,2.467428779602051
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,24,128,1,float16,float16,0,3.2094352722167967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,1,128,1,float16,float16,0,2.912118339538574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,1,128,1,float16,fp8,0,2.5506223678588866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,1,128,1,fp8,fp8,0,2.32534236907959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,2,128,1,fp8,fp8,0,2.440760040283203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,2,128,1,float16,fp8,0,2.6525535583496094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,2,128,1,float16,float16,0,3.830806350708008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,4,128,1,float16,float16,0,3.4463966369628904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,4,128,1,float16,fp8,0,2.423646354675293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,4,128,1,fp8,fp8,0,2.415086364746094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,8,128,1,float16,fp8,0,2.3607664108276367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,8,128,1,float16,float16,0,3.731439971923828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,24,128,1,float16,float16,0,1.6783279418945312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,8,128,1,fp8,fp8,0,2.4675567626953123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,24,128,1,float16,fp8,0,1.319923210144043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,24,128,1,fp8,fp8,0,1.5043647766113282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,1,128,1,float16,fp8,0,1.2850704193115234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,1,128,1,fp8,fp8,0,1.2245295524597168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,1,128,1,float16,float16,0,2.084552001953125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,2,128,1,float16,float16,0,1.4852992057800294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,2,128,1,float16,fp8,0,1.2345423698425293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,4,128,1,float16,float16,0,1.9826128005981445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,4,128,1,fp8,fp8,0,1.2142368316650392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,4,128,1,float16,fp8,0,1.4024368286132813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,8,128,1,float16,fp8,0,1.2225024223327636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,8,128,1,float16,float16,0,2.089135932922363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,24,128,1,float16,float16,0,0.7910783767700196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,2,128,1,fp8,fp8,0,1.2151439666748047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,24,128,1,fp8,fp8,0,0.6976175785064698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,1,128,1,float16,float16,0,0.742140817642212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,24,128,1,float16,fp8,0,1.1358384132385253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,1,128,1,float16,fp8,0,0.6657199859619141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,1,128,1,fp8,fp8,0,0.6638671875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,2,128,1,float16,float16,0,0.7626351833343505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,2,128,1,float16,fp8,0,0.6649680137634277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,2,128,1,fp8,fp8,0,0.6637728214263916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,8,128,1,fp8,fp8,0,1.237337589263916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,4,128,1,float16,float16,0,0.7713903903961181
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,4,128,1,float16,fp8,0,0.6654687881469726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,4,128,1,fp8,fp8,0,0.6641488075256348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,8,128,1,float16,float16,0,0.7719168186187744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,8,128,1,float16,fp8,0,0.6850800037384033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,8,128,1,fp8,fp8,0,0.6663343906402588
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,1,128,1,float16,fp8,0,3.424871826171875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,1,128,1,fp8,fp8,0,3.3168704986572264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,1,128,1,float16,float16,0,5.085764694213867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,2,128,1,float16,float16,0,5.184963226318359
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,2,128,1,float16,fp8,0,3.4076671600341797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,2,128,1,fp8,fp8,0,3.535788726806641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,4,128,1,float16,fp8,0,3.8031169891357424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,4,128,1,float16,float16,0,5.403635025024414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,4,128,1,fp8,fp8,0,3.376073455810547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,24,128,1,float16,float16,0,2.028955268859863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,8,128,1,float16,float16,0,5.747244644165039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,8,128,1,float16,fp8,0,3.81334228515625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,8,128,1,fp8,fp8,0,3.3198848724365235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,24,128,1,float16,fp8,0,2.1381568908691406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,24,128,1,fp8,fp8,0,1.9960960388183593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,1,128,1,float16,fp8,0,1.7029535293579101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,1,128,1,float16,float16,0,2.3462448120117188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,1,128,1,fp8,fp8,0,1.7091552734375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,2,128,1,float16,float16,0,1.9903295516967774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,2,128,1,float16,fp8,0,2.291836738586426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,2,128,1,fp8,fp8,0,1.7262399673461915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,4,128,1,float16,fp8,0,1.718275260925293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,4,128,1,float16,float16,0,3.1057743072509765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,4,128,1,fp8,fp8,0,1.7129791259765625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,8,128,1,fp8,fp8,0,1.7514944076538086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,24,128,1,float16,float16,0,1.0400639533996583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,24,128,1,float16,fp8,0,0.9623472213745117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,24,128,1,fp8,fp8,0,0.9232848167419434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,1,128,1,float16,float16,0,1.0337679862976075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,1,128,1,float16,fp8,0,0.881436824798584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,1,128,1,fp8,fp8,0,1.1181344032287597
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,8,128,1,float16,float16,0,2.3038095474243163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,2,128,1,float16,float16,0,0.9801424026489258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,2,128,1,float16,fp8,0,0.9061727523803711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,8,128,1,float16,fp8,0,2.4440383911132812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,2,128,1,fp8,fp8,0,0.8859248161315918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,4,128,1,float16,float16,0,1.086251163482666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,4,128,1,float16,fp8,0,0.8972672462463379
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,4,128,1,fp8,fp8,0,0.8946319580078125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,8,128,1,float16,float16,0,1.0088496208190918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,24,128,1,float16,float16,0,0.5971759796142578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,24,128,1,float16,fp8,0,0.5203936100006104
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,8,128,1,fp8,fp8,0,0.9097567558288574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,8,128,1,float16,fp8,0,0.9629280090332031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,24,128,1,fp8,fp8,0,0.5195536136627197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,1,128,1,float16,fp8,0,0.5025392055511475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,1,128,1,fp8,fp8,0,0.49156641960144043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,2,128,1,float16,fp8,0,0.49793438911437987
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,2,128,1,fp8,fp8,0,0.48705282211303713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,4,128,1,float16,float16,0,0.5595647811889648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,4,128,1,fp8,fp8,0,0.49901599884033204
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,8,128,1,float16,float16,0,0.5704160213470459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,8,128,1,float16,fp8,0,0.49837441444396974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,1,128,1,float16,float16,0,0.5730463981628418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,8,128,1,fp8,fp8,0,0.49996161460876465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,2,128,1,float16,float16,0,0.560811185836792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,4,128,1,float16,fp8,0,0.48767361640930174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,1,128,1,float16,fp8,0,4.514380645751953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,1,128,1,fp8,fp8,0,4.401571273803711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,1,128,1,float16,float16,0,6.675662231445313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,2,128,1,float16,float16,0,6.516289520263672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,2,128,1,float16,fp8,0,4.490596771240234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,2,128,1,fp8,fp8,0,4.749052810668945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,4,128,1,float16,fp8,0,4.48535041809082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,4,128,1,float16,float16,0,7.820855712890625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,4,128,1,fp8,fp8,0,4.590238571166992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,8,128,1,float16,fp8,0,4.522300720214844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,24,128,1,float16,float16,0,3.2865150451660154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,8,128,1,float16,float16,0,8.064891052246093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,8,128,1,fp8,fp8,0,4.845772933959961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,24,128,1,float16,fp8,0,2.3185871124267576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,24,128,1,fp8,fp8,0,2.6943103790283205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,1,128,1,float16,fp8,0,2.2627695083618162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,1,128,1,float16,float16,0,3.5988433837890623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,1,128,1,fp8,fp8,0,2.199176025390625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,2,128,1,float16,float16,0,3.1323055267333983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,2,128,1,float16,fp8,0,2.6259456634521485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,2,128,1,fp8,fp8,0,2.691526412963867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,4,128,1,float16,float16,0,3.166904067993164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,4,128,1,float16,fp8,0,2.457579231262207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,4,128,1,fp8,fp8,0,2.485183906555176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,8,128,1,float16,float16,0,2.8150495529174804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,8,128,1,float16,fp8,0,2.599537658691406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,24,128,1,float16,float16,0,1.3975695610046386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,8,128,1,fp8,fp8,0,2.296436882019043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,24,128,1,fp8,fp8,0,1.208176040649414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,24,128,1,float16,fp8,0,1.564577579498291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,1,128,1,float16,fp8,0,1.3122912406921388
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,1,128,1,float16,float16,0,1.9295503616333007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,2,128,1,float16,float16,0,1.3537343978881835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,2,128,1,float16,fp8,0,1.17335844039917
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,2,128,1,fp8,fp8,0,1.4321151733398438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,4,128,1,float16,fp8,0,1.1453871726989746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,4,128,1,float16,float16,0,1.7808208465576172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,4,128,1,fp8,fp8,0,1.1307616233825684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,1,128,1,fp8,fp8,0,1.1286911964416504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,8,128,1,float16,float16,0,1.3443903923034668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,8,128,1,fp8,fp8,0,1.125823974609375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,24,128,1,float16,float16,0,0.7182735919952392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,8,128,1,float16,fp8,0,1.6404895782470703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,24,128,1,float16,fp8,0,0.7214704036712647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,24,128,1,fp8,fp8,0,0.6631648063659668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,1,128,1,float16,float16,0,0.6665103912353516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,1,128,1,float16,fp8,0,0.6835103988647461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,1,128,1,fp8,fp8,0,0.6291247844696045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,2,128,1,float16,float16,0,0.6799568176269531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,2,128,1,float16,fp8,0,0.7150800228118896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,2,128,1,fp8,fp8,0,0.6588912010192871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,4,128,1,float16,fp8,0,0.6294288158416748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,4,128,1,fp8,fp8,0,0.6304207801818847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,8,128,1,float16,float16,0,0.7001327991485595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,8,128,1,float16,fp8,0,0.6111695766448975
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,8,128,1,fp8,fp8,0,0.6030960083007812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,24,128,1,float16,float16,0,0.409932804107666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,24,128,1,float16,fp8,0,0.36007521152496336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,24,128,1,fp8,fp8,0,0.35845921039581297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,1,128,1,float16,fp8,0,0.3409231901168823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,1,128,1,float16,float16,0,0.38282079696655275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,1,128,1,fp8,fp8,0,0.3402992010116577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,2,128,1,float16,float16,0,0.43299198150634766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,2,128,1,float16,fp8,0,0.3397840023040771
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,2,128,1,fp8,fp8,0,0.3508336067199707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,4,128,1,float16,float16,0,0.3801599979400635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,4,128,1,float16,float16,0,0.6950160026550293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,4,128,1,fp8,fp8,0,0.3409872055053711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,8,128,1,float16,float16,0,0.39300320148468015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,8,128,1,float16,fp8,0,0.34121921062469485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,8,128,1,fp8,fp8,0,0.3451616048812866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,4,128,1,float16,fp8,0,0.3439264059066772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,1,128,1,float16,float16,0,2.8784671783447267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,1,128,1,float16,fp8,0,2.5676128387451174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,2,128,1,float16,fp8,0,2.600129508972168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,2,128,1,fp8,fp8,0,2.535161590576172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,2,128,1,float16,float16,0,4.4671775817871096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,4,128,1,float16,fp8,0,2.5358303070068358
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,4,128,1,float16,float16,0,3.464263916015625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,1,128,1,fp8,fp8,0,2.5814592361450197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,4,128,1,fp8,fp8,0,2.837351989746094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,8,128,1,fp8,fp8,0,2.55010871887207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,24,128,1,float16,float16,0,1.598846435546875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,8,128,1,float16,float16,0,3.810995101928711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,8,128,1,float16,fp8,0,2.9105199813842773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,1,128,1,float16,fp8,0,1.3471183776855469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,24,128,1,float16,fp8,0,1.5775376319885255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,24,128,1,fp8,fp8,0,1.9360448837280273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,1,128,1,float16,float16,0,1.9114416122436524
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,1,128,1,fp8,fp8,0,1.3286928176879882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,2,128,1,float16,float16,0,1.4762160301208496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,2,128,1,float16,fp8,0,1.3097439765930177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,2,128,1,fp8,fp8,0,1.3764847755432128
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,4,128,1,float16,float16,0,1.468545627593994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,4,128,1,fp8,fp8,0,1.305247974395752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,4,128,1,float16,fp8,0,1.9180719375610351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,8,128,1,float16,float16,0,1.4549344062805176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,8,128,1,float16,fp8,0,1.4792176246643067
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,24,128,1,float16,float16,0,0.8597760200500488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,8,128,1,fp8,fp8,0,1.328643226623535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,24,128,1,fp8,fp8,0,0.7611408233642578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,1,128,1,float16,fp8,0,0.7005263805389405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,1,128,1,fp8,fp8,0,0.7064703941345215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,2,128,1,float16,float16,0,0.7533664226531982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,2,128,1,float16,fp8,0,0.6944896221160889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,2,128,1,fp8,fp8,0,0.8056559562683105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,4,128,1,float16,float16,0,0.7726640224456787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,4,128,1,float16,fp8,0,0.7268720149993897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,24,128,1,float16,fp8,0,0.8534031867980957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,4,128,1,fp8,fp8,0,0.6925663948059082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,8,128,1,float16,float16,0,0.7831727981567382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,1,128,1,float16,float16,0,0.803707218170166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,8,128,1,fp8,fp8,0,0.6924784183502197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,24,128,1,float16,float16,0,0.45589919090270997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,24,128,1,float16,fp8,0,0.4089536190032959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,24,128,1,fp8,fp8,0,0.40603837966918943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,1,128,1,float16,float16,0,0.41980319023132323
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,1,128,1,float16,fp8,0,0.3817023992538452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,1,128,1,fp8,fp8,0,0.38058559894561766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,2,128,1,float16,float16,0,0.41615681648254393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,2,128,1,float16,fp8,0,0.381168007850647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,2,128,1,fp8,fp8,0,0.38114080429077146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,4,128,1,float16,float16,0,0.4189295768737793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,4,128,1,float16,fp8,0,0.3820255994796753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,4,128,1,fp8,fp8,0,0.38121919631958007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,8,128,1,float16,float16,0,0.4250495910644531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,8,128,1,float16,fp8,0,0.3837663888931274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,8,128,1,fp8,fp8,0,0.3798288106918335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,24,128,1,float16,float16,0,0.2569711923599243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,24,128,1,float16,fp8,0,0.2366624116897583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,24,128,1,fp8,fp8,0,0.23326559066772462
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,1,128,1,float16,float16,0,0.23747520446777343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,1,128,1,float16,fp8,0,0.21903839111328124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,1,128,1,fp8,fp8,0,0.2172976016998291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,8,128,1,float16,fp8,0,0.6829984188079834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,2,128,1,float16,fp8,0,0.21954560279846191
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,4,128,1,float16,float16,0,0.23759839534759522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,4,128,1,float16,fp8,0,0.2191312074661255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,4,128,1,fp8,fp8,0,0.215665602684021
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,8,128,1,float16,float16,0,0.2483680009841919
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,8,128,1,float16,fp8,0,0.21523358821868896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,8,128,1,fp8,fp8,0,0.21942079067230225
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,2,128,1,float16,float16,0,0.23651199340820311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,2,128,1,fp8,fp8,0,0.21828479766845704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,1,128,1,float16,fp8,0,2.484163284301758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,1,128,1,float16,float16,0,3.0671472549438477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,1,128,1,fp8,fp8,0,2.530633544921875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,2,128,1,float16,fp8,0,2.528438377380371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,2,128,1,float16,float16,0,3.113852882385254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,2,128,1,fp8,fp8,0,2.5706064224243166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,4,128,1,float16,fp8,0,2.4959184646606447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,4,128,1,float16,float16,0,3.9677761077880858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,4,128,1,fp8,fp8,0,2.693630409240723
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,8,128,1,float16,float16,0,3.5428592681884767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,24,128,1,float16,float16,0,1.5972528457641602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,8,128,1,float16,fp8,0,2.5128623962402346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,24,128,1,float16,fp8,0,1.4853343963623047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,8,128,1,fp8,fp8,0,2.8478832244873047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,1,128,1,float16,float16,0,1.387399959564209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,24,128,1,fp8,fp8,0,1.695305633544922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,1,128,1,float16,fp8,0,1.2582672119140625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,1,128,1,fp8,fp8,0,1.2612832069396973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,2,128,1,float16,float16,0,1.357040023803711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,2,128,1,float16,fp8,0,1.2565631866455078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,2,128,1,fp8,fp8,0,1.2591391563415528
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,4,128,1,float16,fp8,0,1.2533935546875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,4,128,1,float16,float16,0,1.5534751892089844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,4,128,1,fp8,fp8,0,1.2587679862976073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,8,128,1,float16,float16,0,1.4120368003845214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,8,128,1,float16,fp8,0,1.6546575546264648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,8,128,1,fp8,fp8,0,1.273782444000244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,24,128,1,float16,float16,0,1.1274319648742677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,24,128,1,float16,fp8,0,0.7355904102325439
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,24,128,1,fp8,fp8,0,0.7516608238220215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,1,128,1,float16,fp8,0,0.6630959987640381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,1,128,1,fp8,fp8,0,0.6593135833740235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,1,128,1,float16,float16,0,0.9733360290527344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,2,128,1,float16,float16,0,0.7110735893249511
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,2,128,1,fp8,fp8,0,0.8102975845336914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,4,128,1,float16,float16,0,0.7233712196350097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,4,128,1,fp8,fp8,0,0.6518127918243408
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,4,128,1,float16,fp8,0,0.6608672142028809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,8,128,1,float16,float16,0,0.8383135795593262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,8,128,1,float16,fp8,0,0.6497920036315918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,8,128,1,fp8,fp8,0,0.660644817352295
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,24,128,1,float16,float16,0,0.4936063766479492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,24,128,1,float16,fp8,0,0.38557279109954834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,24,128,1,fp8,fp8,0,0.3923712015151978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,1,128,1,float16,float16,0,0.44258880615234375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,1,128,1,float16,fp8,0,0.35333919525146484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,1,128,1,fp8,fp8,0,0.3596208095550537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,2,128,1,float16,float16,0,0.41784000396728516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,2,128,1,float16,fp8,0,0.35207679271698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,2,128,1,float16,fp8,0,0.6558608055114746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,2,128,1,fp8,fp8,0,0.3595072031021118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,4,128,1,float16,float16,0,0.4225471973419189
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,4,128,1,fp8,fp8,0,0.35305280685424806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,8,128,1,float16,float16,0,0.40348639488220217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,8,128,1,float16,fp8,0,0.41040959358215334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,8,128,1,fp8,fp8,0,0.3515023946762085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,24,128,1,float16,fp8,0,0.22146399021148683
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,24,128,1,fp8,fp8,0,0.21629600524902343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,1,128,1,float16,float16,0,0.2145359992980957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,1,128,1,float16,fp8,0,0.19734079837799073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,1,128,1,fp8,fp8,0,0.19941920042037964
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,2,128,1,float16,float16,0,0.21345279216766358
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,2,128,1,float16,fp8,0,0.1997375965118408
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,2,128,1,fp8,fp8,0,0.1972864031791687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,4,128,1,float16,float16,0,0.21929919719696045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,4,128,1,float16,fp8,0,0.19721440076828003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,4,128,1,fp8,fp8,0,0.19922879934310914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,8,128,1,float16,float16,0,0.22035839557647705
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,8,128,1,float16,fp8,0,0.20017120838165284
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,8,128,1,fp8,fp8,0,0.19743039608001708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,24,128,1,float16,float16,0,0.14052319526672363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,24,128,1,float16,fp8,0,0.126692795753479
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,24,128,1,fp8,fp8,0,0.12761119604110718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,1,128,1,float16,float16,0,0.12423520088195801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,1,128,1,float16,fp8,0,0.11834399700164795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,4,128,1,float16,fp8,0,0.35248000621795655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,1,128,1,fp8,fp8,0,0.11616959571838378
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,2,128,1,float16,float16,0,0.12658560276031494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,2,128,1,float16,fp8,0,0.1166640043258667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,4,128,1,float16,float16,0,0.12752959728240967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,4,128,1,float16,fp8,0,0.11668800115585327
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,4,128,1,fp8,fp8,0,0.11804000139236451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,8,128,1,float16,float16,0,0.12742400169372559
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,24,128,1,float16,float16,0,0.2433919906616211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,8,128,1,fp8,fp8,0,0.11652959585189819
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,1,128,1,float16,fp8,0,1.516766357421875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,1,128,1,float16,float16,0,1.6215423583984374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,2,128,1,fp8,fp8,0,0.11814240217208863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,1,128,1,fp8,fp8,0,1.5169119834899902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,8,128,1,float16,fp8,0,0.11886240243911743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,2,128,1,float16,float16,0,1.6185823440551759
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,2,128,1,float16,fp8,0,1.5163776397705078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,2,128,1,fp8,fp8,0,1.5160143852233887
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,4,128,1,float16,float16,0,1.6263055801391602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,4,128,1,float16,fp8,0,1.6933456420898438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,4,128,1,fp8,fp8,0,1.5246447563171386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,8,128,1,float16,float16,0,1.8207712173461914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,8,128,1,float16,fp8,0,1.5570240020751953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,24,128,1,float16,float16,0,0.996396827697754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,8,128,1,fp8,fp8,0,1.5786016464233399
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,24,128,1,float16,fp8,0,0.9910688400268555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,24,128,1,fp8,fp8,0,0.8943856239318848
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,1,128,1,float16,float16,0,1.012275218963623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,1,128,1,float16,fp8,0,0.8490608215332032
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,1,128,1,fp8,fp8,0,0.7847568035125733
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,2,128,1,float16,float16,0,0.8285344123840332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,2,128,1,float16,fp8,0,1.0135184288024903
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,4,128,1,float16,float16,0,0.823908805847168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,4,128,1,float16,fp8,0,0.8025168418884278
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,4,128,1,fp8,fp8,0,0.7814847946166992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,8,128,1,float16,fp8,0,0.8081168174743653
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,8,128,1,float16,float16,0,0.9252079963684082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,8,128,1,fp8,fp8,0,0.7774896144866943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,24,128,1,float16,fp8,0,0.5384655952453613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,24,128,1,fp8,fp8,0,0.46586880683898924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,1,128,1,float16,float16,0,0.45408000946044924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,1,128,1,float16,fp8,0,0.4116079807281494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,1,128,1,fp8,fp8,0,0.41209921836853025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,2,128,1,fp8,fp8,0,0.7888800144195557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,2,128,1,float16,float16,0,0.4301231861114502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,2,128,1,float16,fp8,0,0.4194528102874756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,2,128,1,fp8,fp8,0,0.4107840061187744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,4,128,1,float16,float16,0,0.45074238777160647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,4,128,1,float16,fp8,0,0.4099936008453369
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,24,128,1,float16,float16,0,0.5136528015136719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,4,128,1,fp8,fp8,0,0.4164991855621338
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,8,128,1,float16,float16,0,0.45060319900512696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,8,128,1,float16,fp8,0,0.41713762283325195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,8,128,1,fp8,fp8,0,0.41527361869812013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,24,128,1,float16,float16,0,0.2741487979888916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,24,128,1,float16,fp8,0,0.2554847955703735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,24,128,1,fp8,fp8,0,0.2544895887374878
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,1,128,1,float16,float16,0,0.23757119178771974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,1,128,1,float16,fp8,0,0.22736639976501466
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,1,128,1,fp8,fp8,0,0.22836480140686036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,2,128,1,float16,float16,0,0.2366879940032959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,2,128,1,float16,fp8,0,0.2270047903060913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,2,128,1,fp8,fp8,0,0.2294095993041992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,4,128,1,float16,float16,0,0.24042561054229736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,4,128,1,fp8,fp8,0,0.22806880474090577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,8,128,1,float16,float16,0,0.24834399223327636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,8,128,1,fp8,fp8,0,0.22404160499572753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,8,128,1,float16,fp8,0,0.22767040729522706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,24,128,1,float16,float16,0,0.15464320182800292
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,24,128,1,float16,fp8,0,0.14426239728927612
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,24,128,1,fp8,fp8,0,0.14221279621124266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,1,128,1,float16,float16,0,0.13704639673233032
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,1,128,1,float16,fp8,0,0.1276975989341736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,2,128,1,float16,float16,0,0.13712480068206787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,1,128,1,fp8,fp8,0,0.1286960005760193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,2,128,1,float16,fp8,0,0.12686560153961182
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,2,128,1,fp8,fp8,0,0.1292464017868042
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,4,128,1,float16,fp8,0,0.127839994430542
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,4,128,1,float16,float16,0,0.13637919425964357
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,4,128,1,fp8,fp8,0,0.1292207956314087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,8,128,1,float16,fp8,0,0.12876479625701903
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,24,128,1,float16,float16,0,0.09480159878730773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,24,128,1,float16,fp8,0,0.0871999979019165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,24,128,1,fp8,fp8,0,0.08791199922561646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,1,128,1,float16,float16,0,0.08504160046577454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,4,128,1,float16,fp8,0,0.22696321010589598
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,1,128,1,float16,fp8,0,0.08032000064849854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,2,128,1,float16,float16,0,0.08454560041427613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,2,128,1,float16,fp8,0,0.08027679920196533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,2,128,1,fp8,fp8,0,0.08046079874038696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,4,128,1,float16,float16,0,0.08568000197410583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,4,128,1,float16,fp8,0,0.08043519854545593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,4,128,1,fp8,fp8,0,0.08033279776573181
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,8,128,1,float16,float16,0,0.08803359866142273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,8,128,1,float16,fp8,0,0.0802944004535675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,8,128,1,float16,float16,0,0.14139360189437866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,8,128,1,fp8,fp8,0,0.08094080090522766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,8,128,1,fp8,fp8,0,0.1294384002685547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,1,128,1,fp8,fp8,0,0.08120319843292237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,1,128,1,float16,float16,0,1.5773695945739745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,1,128,1,float16,fp8,0,1.5560400009155273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,1,128,1,fp8,fp8,0,1.5635680198669433
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,2,128,1,float16,float16,0,1.5768400192260743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,2,128,1,float16,fp8,0,1.622060775756836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,2,128,1,fp8,fp8,0,1.5523056030273437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,4,128,1,float16,float16,0,1.6323823928833008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,4,128,1,float16,fp8,0,1.579435157775879
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,4,128,1,fp8,fp8,0,1.5498847961425781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,8,128,1,float16,float16,0,1.6670272827148438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,8,128,1,float16,fp8,0,1.7420095443725585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,24,128,1,float16,float16,0,1.0155903816223144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,8,128,1,fp8,fp8,0,1.5618207931518555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,24,128,1,float16,fp8,0,1.3096384048461913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,24,128,1,fp8,fp8,0,0.9550959587097168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,1,128,1,float16,float16,0,0.8230815887451172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,1,128,1,float16,fp8,0,0.8047023773193359
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,1,128,1,fp8,fp8,0,0.7986720085144043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,2,128,1,float16,float16,0,0.8214048385620117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,2,128,1,fp8,fp8,0,0.8024592399597168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,4,128,1,float16,float16,0,0.818166446685791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,4,128,1,float16,fp8,0,0.8017184257507324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,4,128,1,fp8,fp8,0,0.7990064144134521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,8,128,1,float16,float16,0,0.91636962890625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,8,128,1,fp8,fp8,0,0.7969727993011475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,8,128,1,float16,fp8,0,0.8351311683654785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,24,128,1,float16,float16,0,0.5835792064666748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,24,128,1,float16,fp8,0,0.4880047798156738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,1,128,1,float16,float16,0,0.4488175868988037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,1,128,1,float16,fp8,0,0.4197807788848877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,1,128,1,fp8,fp8,0,0.4263807773590088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,2,128,1,float16,fp8,0,0.793398380279541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,2,128,1,float16,float16,0,0.42604961395263674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,2,128,1,float16,fp8,0,0.4156383991241455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,2,128,1,fp8,fp8,0,0.4133887767791748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,4,128,1,float16,float16,0,0.43789119720458985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,4,128,1,float16,fp8,0,0.4149919986724854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,4,128,1,fp8,fp8,0,0.41237602233886717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,8,128,1,float16,float16,0,0.4543295860290527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,8,128,1,float16,fp8,0,0.41193280220031736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,8,128,1,fp8,fp8,0,0.4104191780090332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,24,128,1,float16,fp8,0,0.25789120197296145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,24,128,1,fp8,fp8,0,0.2581423997879028
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,1,128,1,float16,float16,0,0.22944319248199463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,24,128,1,fp8,fp8,0,0.48619837760925294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,1,128,1,float16,fp8,0,0.2232448101043701
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,1,128,1,fp8,fp8,0,0.22545440196990968
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,2,128,1,float16,float16,0,0.22848479747772216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,2,128,1,float16,fp8,0,0.22532320022583008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,2,128,1,fp8,fp8,0,0.2235759973526001
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,4,128,1,float16,float16,0,0.23790719509124755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,4,128,1,float16,fp8,0,0.2225343942642212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,4,128,1,fp8,fp8,0,0.2248431921005249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,8,128,1,float16,float16,0,0.2405855894088745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,8,128,1,float16,fp8,0,0.22440800666809083
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,8,128,1,fp8,fp8,0,0.221612811088562
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,24,128,1,float16,float16,0,0.15658719539642335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,24,128,1,float16,float16,0,0.28059840202331543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,24,128,1,fp8,fp8,0,0.14398720264434814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,1,128,1,float16,float16,0,0.12838879823684693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,1,128,1,float16,fp8,0,0.12387200593948364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,1,128,1,fp8,fp8,0,0.12411680221557617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,2,128,1,float16,float16,0,0.1302783966064453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,2,128,1,float16,fp8,0,0.12403680086135864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,2,128,1,fp8,fp8,0,0.12459360361099243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,4,128,1,float16,float16,0,0.1333024024963379
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,4,128,1,fp8,fp8,0,0.1236847996711731
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,8,128,1,float16,float16,0,0.13778719902038575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,8,128,1,float16,fp8,0,0.1242400050163269
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,8,128,1,fp8,fp8,0,0.12443840503692627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,24,128,1,float16,float16,0,0.09056159853935242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,24,128,1,float16,fp8,0,0.0837664008140564
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,24,128,1,fp8,fp8,0,0.0828000009059906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,1,128,1,float16,float16,0,0.07624160051345825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,1,128,1,float16,fp8,0,0.07375199794769287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,1,128,1,fp8,fp8,0,0.07429919838905334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,2,128,1,float16,float16,0,0.0751200020313263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,2,128,1,float16,fp8,0,0.07460160255432129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,24,128,1,float16,fp8,0,0.1425168037414551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,4,128,1,float16,fp8,0,0.07304959893226623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,4,128,1,fp8,fp8,0,0.07367039918899536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,8,128,1,float16,float16,0,0.07918559908866882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,8,128,1,float16,fp8,0,0.0734287977218628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,8,128,1,fp8,fp8,0,0.07314079999923706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,24,128,1,float16,float16,0,0.055473601818084715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,24,128,1,float16,fp8,0,0.05549439787864685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,24,128,1,fp8,fp8,0,0.05559679865837097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,1,128,1,float16,float16,0,0.05156480073928833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,1,128,1,float16,fp8,0,0.050432002544403075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,1,128,1,fp8,fp8,0,0.05020639896392822
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,2,128,1,float16,float16,0,0.05143359899520874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,2,128,1,float16,fp8,0,0.05005919933319092
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,2,128,1,fp8,fp8,0,0.051209598779678345
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,4,128,1,float16,float16,0,0.052713602781295776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,4,128,1,float16,fp8,0,0.05089920163154602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,4,128,1,fp8,fp8,0,0.05074399709701538
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,2,128,1,fp8,fp8,0,0.07418400049209595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,8,128,1,float16,float16,0,0.053686398267745974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,8,128,1,float16,fp8,0,0.050676798820495604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,4,128,1,float16,fp8,0,0.12524640560150146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,1,128,1,float16,float16,0,0.9790431976318359
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,1,128,1,float16,fp8,0,1.0080880165100097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,1,128,1,fp8,fp8,0,0.9999520301818847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,2,128,1,float16,float16,0,0.9955920219421387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,4,128,1,float16,float16,0,0.07745760083198547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,8,128,1,fp8,fp8,0,0.050374400615692136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,2,128,1,fp8,fp8,0,1.0201711654663086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,4,128,1,float16,float16,0,1.0208000183105468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,4,128,1,float16,fp8,0,0.9960047721862793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,4,128,1,fp8,fp8,0,1.0062623977661134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,8,128,1,float16,float16,0,1.0842831611633301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,8,128,1,float16,fp8,0,0.9950032234191895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,24,128,1,float16,float16,0,0.6785520076751709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,24,128,1,float16,fp8,0,0.6191952228546143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,8,128,1,fp8,fp8,0,1.0038640022277832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,2,128,1,float16,fp8,0,0.9983728408813477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,1,128,1,float16,float16,0,0.5119103908538818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,24,128,1,fp8,fp8,0,0.695798397064209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,1,128,1,float16,fp8,0,0.5240064144134522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,1,128,1,fp8,fp8,0,0.5163343906402588
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,2,128,1,float16,float16,0,0.5098544120788574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,2,128,1,float16,fp8,0,0.584449577331543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,2,128,1,fp8,fp8,0,0.5213295936584472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,4,128,1,float16,float16,0,0.5212512016296387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,4,128,1,float16,fp8,0,0.5140207767486572
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,4,128,1,fp8,fp8,0,0.5210207939147949
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,8,128,1,float16,fp8,0,0.5111343860626221
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,24,128,1,float16,float16,0,0.3522559881210327
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,8,128,1,fp8,fp8,0,0.5128719806671143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,24,128,1,float16,fp8,0,0.32375359535217285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,1,128,1,float16,float16,0,0.27174398899078367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,24,128,1,fp8,fp8,0,0.32670719623565675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,1,128,1,float16,fp8,0,0.2714031934738159
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,1,128,1,fp8,fp8,0,0.27221438884735105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,2,128,1,float16,float16,0,0.27350239753723143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,2,128,1,float16,fp8,0,0.2716991901397705
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,2,128,1,fp8,fp8,0,0.27179679870605467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,4,128,1,float16,float16,0,0.281278395652771
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,4,128,1,float16,fp8,0,0.27091360092163086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,4,128,1,fp8,fp8,0,0.2705024003982544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,8,128,1,float16,float16,0,0.29587841033935547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,8,128,1,float16,fp8,0,0.27002079486846925
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,8,128,1,fp8,fp8,0,0.27104160785675047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,24,128,1,float16,float16,0,0.18643200397491455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,24,128,1,float16,fp8,0,0.17550079822540282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,24,128,1,fp8,fp8,0,0.1750175952911377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,1,128,1,float16,float16,0,0.1490928053855896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,1,128,1,float16,fp8,0,0.14641120433807372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,1,128,1,fp8,fp8,0,0.1483199954032898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,2,128,1,float16,float16,0,0.1476207971572876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,2,128,1,float16,fp8,0,0.14828640222549438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,2,128,1,fp8,fp8,0,0.14671039581298828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,8,128,1,float16,float16,0,0.5625296115875245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,4,128,1,float16,float16,0,0.15310399532318114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,4,128,1,fp8,fp8,0,0.14839359521865844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,8,128,1,float16,float16,0,0.15977120399475098
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,8,128,1,float16,fp8,0,0.1483024001121521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,8,128,1,fp8,fp8,0,0.1475119948387146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,24,128,1,float16,float16,0,0.10591520071029663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,24,128,1,float16,fp8,0,0.09847040176391601
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,24,128,1,fp8,fp8,0,0.09934719800949096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,1,128,1,float16,float16,0,0.08392159938812256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,1,128,1,float16,fp8,0,0.08377439975738525
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,1,128,1,fp8,fp8,0,0.08324480056762695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,2,128,1,float16,float16,0,0.08483520150184631
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,2,128,1,float16,fp8,0,0.08291199803352356
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,4,128,1,float16,float16,0,0.08641279935836792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,4,128,1,float16,fp8,0,0.08323360085487366
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,4,128,1,fp8,fp8,0,0.08281760215759278
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,8,128,1,float16,float16,0,0.09061440229415893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,8,128,1,float16,fp8,0,0.08242239952087402
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,8,128,1,fp8,fp8,0,0.08243039846420289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,24,128,1,float16,float16,0,0.06060799956321716
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,4,128,1,float16,fp8,0,0.1470047950744629
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,24,128,1,float16,fp8,0,0.05780799984931946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,24,128,1,fp8,fp8,0,0.05885760188102722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,1,128,1,float16,fp8,0,0.051500797271728516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,1,128,1,fp8,fp8,0,0.05152159929275513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,2,128,1,float16,float16,0,0.05296639800071716
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,2,128,1,float16,fp8,0,0.05153599977493286
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,2,128,1,fp8,fp8,0,0.052052801847457884
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,4,128,1,float16,float16,0,0.05363199710845947
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,4,128,1,float16,fp8,0,0.05246719717979431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,2,128,1,fp8,fp8,0,0.0842415988445282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,8,128,1,float16,float16,0,0.05605279803276062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,8,128,1,fp8,fp8,0,0.05264639854431152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,24,128,1,float16,float16,0,0.03547039926052094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,24,128,1,float16,fp8,0,0.03530240058898926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,24,128,1,fp8,fp8,0,0.03537279963493347
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,1,128,1,float16,float16,0,0.03320960104465485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,1,128,1,float16,fp8,0,0.03189440071582794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,1,128,1,fp8,fp8,0,0.03240959942340851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,2,128,1,float16,float16,0,0.03274880051612854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,1,128,1,float16,float16,0,0.05243840217590332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,2,128,1,float16,fp8,0,0.03145279884338379
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,4,128,1,float16,float16,0,0.0333983987569809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,4,128,1,float16,fp8,0,0.033129599690437314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,4,128,1,fp8,fp8,0,0.03287360072135925
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,8,128,1,float16,float16,0,0.03324800133705139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,8,128,1,float16,fp8,0,0.03120959997177124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,8,128,1,fp8,fp8,0,0.03132959902286529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,8,128,1,float16,fp8,0,0.053067201375961305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,1,128,1,float16,float16,0,1.0519311904907227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,1,128,1,float16,fp8,0,1.102849578857422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,2,128,1,fp8,fp8,0,0.031014400720596313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,2,128,1,float16,float16,0,1.051095962524414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,1,128,1,fp8,fp8,0,1.2380512237548829
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,4,128,1,fp8,fp8,0,0.05212000012397766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,2,128,1,float16,fp8,0,1.0908127784729005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,4,128,1,float16,float16,0,1.0822624206542968
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,2,128,1,fp8,fp8,0,1.3814096450805664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,4,128,1,float16,fp8,0,1.090510368347168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,4,128,1,fp8,fp8,0,1.0900768280029296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,8,128,1,float16,float16,0,1.1592000007629395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,8,128,1,float16,fp8,0,1.0963007926940918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,24,128,1,float16,float16,0,0.7547808170318604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,8,128,1,fp8,fp8,0,1.0890080451965332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,24,128,1,float16,fp8,0,0.7103759765625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,24,128,1,fp8,fp8,0,0.7064896106719971
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,1,128,1,float16,float16,0,0.5378159999847412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,1,128,1,float16,fp8,0,0.5606815814971924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,1,128,1,fp8,fp8,0,0.5675439834594727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,2,128,1,float16,float16,0,0.5455183982849121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,2,128,1,float16,fp8,0,0.5600527763366699
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,2,128,1,fp8,fp8,0,0.5606351852416992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,4,128,1,float16,float16,0,0.5590447902679443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,4,128,1,float16,fp8,0,0.5634960174560547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,4,128,1,fp8,fp8,0,0.5597904205322266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,8,128,1,float16,float16,0,0.5884768009185791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,8,128,1,float16,fp8,0,0.5613711833953857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,24,128,1,float16,float16,0,0.3884495973587036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,8,128,1,fp8,fp8,0,0.5597008228302002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,24,128,1,float16,fp8,0,0.3625439882278442
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,1,128,1,float16,float16,0,0.2827104091644287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,24,128,1,fp8,fp8,0,0.3657871961593628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,1,128,1,fp8,fp8,0,0.29255199432373047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,1,128,1,float16,fp8,0,0.29653120040893555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,2,128,1,float16,float16,0,0.28112640380859377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,2,128,1,float16,fp8,0,0.2969088077545166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,4,128,1,float16,float16,0,0.2950704097747803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,4,128,1,float16,fp8,0,0.29061601161956785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,4,128,1,fp8,fp8,0,0.29415359497070315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,8,128,1,float16,float16,0,0.3144448041915894
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,8,128,1,float16,fp8,0,0.29345600605010985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,8,128,1,fp8,fp8,0,0.29235360622406004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,24,128,1,float16,float16,0,0.20614559650421144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,24,128,1,float16,fp8,0,0.19463839530944824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,24,128,1,fp8,fp8,0,0.19454400539398192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,1,128,1,float16,float16,0,0.15528000593185426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,1,128,1,float16,fp8,0,0.15782400369644164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,1,128,1,fp8,fp8,0,0.15877439975738525
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,2,128,1,float16,float16,0,0.15618079900741577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,2,128,1,fp8,fp8,0,0.16009119749069214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,4,128,1,float16,float16,0,0.15815999507904052
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,4,128,1,float16,fp8,0,0.15742239952087403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,4,128,1,fp8,fp8,0,0.1572975993156433
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,2,128,1,fp8,fp8,0,0.2918384075164795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,8,128,1,float16,float16,0,0.16848479509353637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,8,128,1,float16,fp8,0,0.1554640054702759
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,8,128,1,fp8,fp8,0,0.1576159954071045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,24,128,1,float16,float16,0,0.11135519742965698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,24,128,1,float16,fp8,0,0.10659840106964111
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,24,128,1,fp8,fp8,0,0.10517280101776123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,1,128,1,float16,float16,0,0.08560799956321716
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,1,128,1,float16,fp8,0,0.0855023980140686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,1,128,1,fp8,fp8,0,0.0863919973373413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,2,128,1,float16,float16,0,0.0852832019329071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,2,128,1,float16,fp8,0,0.0860592007637024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,2,128,1,fp8,fp8,0,0.08543040156364441
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,4,128,1,float16,float16,0,0.08965920209884644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,4,128,1,float16,fp8,0,0.08545119762420654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,4,128,1,fp8,fp8,0,0.0866047978401184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,8,128,1,float16,float16,0,0.09332960247993469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,8,128,1,float16,fp8,0,0.08735679984092712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,8,128,1,fp8,fp8,0,0.08689119815826415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,24,128,1,float16,float16,0,0.06426399946212769
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,2,128,1,float16,fp8,0,0.1576815962791443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,24,128,1,float16,fp8,0,0.06074079871177673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,24,128,1,fp8,fp8,0,0.061185598373413086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,1,128,1,float16,fp8,0,0.0517903983592987
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,1,128,1,float16,float16,0,0.05212640166282654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,1,128,1,fp8,fp8,0,0.05244160294532776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,2,128,1,float16,float16,0,0.05109440088272095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,2,128,1,float16,fp8,0,0.05242879986763001
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,2,128,1,fp8,fp8,0,0.05222079753875732
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,4,128,1,float16,fp8,0,0.05265759825706482
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,4,128,1,fp8,fp8,0,0.051622402667999265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,8,128,1,float16,fp8,0,0.05216159820556641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,8,128,1,fp8,fp8,0,0.051686400175094606
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,24,128,1,float16,float16,0,0.03958080112934113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,24,128,1,float16,fp8,0,0.039340800046920775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,24,128,1,fp8,fp8,0,0.040443199872970584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,1,128,1,float16,float16,0,0.035308799147605895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,1,128,1,float16,fp8,0,0.03578239977359772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,1,128,1,fp8,fp8,0,0.03500959873199463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,2,128,1,float16,float16,0,0.03592160046100616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,2,128,1,float16,fp8,0,0.03528000116348266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,2,128,1,fp8,fp8,0,0.03601599931716919
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,4,128,1,float16,float16,0,0.03580319881439209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,4,128,1,float16,fp8,0,0.0367680013179779
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,8,128,1,float16,float16,0,0.037503999471664426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,8,128,1,float16,fp8,0,0.0365664005279541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,8,128,1,fp8,fp8,0,0.036664000153541564
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,24,128,1,float16,float16,0,0.026927998661994933
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,24,128,1,float16,fp8,0,0.02699359953403473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,24,128,1,fp8,fp8,0,0.027292799949645997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,1,128,1,float16,float16,0,0.02492000013589859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,1,128,1,float16,fp8,0,0.024883200228214265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,1,128,1,fp8,fp8,0,0.02494720071554184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,2,128,1,float16,float16,0,0.024952000379562377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,4,128,1,float16,float16,0,0.05181440114974976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,2,128,1,fp8,fp8,0,0.024883200228214265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,4,128,1,float16,float16,0,0.02481119930744171
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,4,128,1,float16,fp8,0,0.024751999974250795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,4,128,1,fp8,fp8,0,0.024766400456428528
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,8,128,1,float16,float16,0,0.02487040013074875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,8,128,1,float16,fp8,0,0.0247856006026268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,8,128,1,fp8,fp8,0,0.024873599410057068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,4,128,1,fp8,fp8,0,0.03539359867572785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,1,128,1,float16,float16,0,0.7748079776763916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,1,128,1,float16,fp8,0,0.8610688209533691
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,1,128,1,fp8,fp8,0,0.861622428894043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,2,128,1,float16,fp8,0,0.024827200174331664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,2,128,1,float16,float16,0,0.7727327823638916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,8,128,1,float16,float16,0,0.054416000843048096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,2,128,1,float16,fp8,0,0.8590096473693848
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,2,128,1,fp8,fp8,0,0.8640175819396972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,4,128,1,float16,fp8,0,0.8654911994934082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,4,128,1,fp8,fp8,0,0.8573488235473633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,8,128,1,float16,float16,0,0.8985664367675781
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,8,128,1,float16,fp8,0,0.8633312225341797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,8,128,1,fp8,fp8,0,0.8551615715026856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,24,128,1,float16,float16,0,0.660916805267334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,24,128,1,float16,fp8,0,0.5836112022399902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,24,128,1,fp8,fp8,0,0.5790800094604492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,1,128,1,float16,fp8,0,0.44832639694213866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,1,128,1,float16,float16,0,0.43996639251708985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,1,128,1,fp8,fp8,0,0.44176321029663085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,2,128,1,float16,float16,0,0.4044816017150879
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,2,128,1,float16,fp8,0,0.44462881088256834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,2,128,1,fp8,fp8,0,0.4393919944763184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,4,128,1,float16,float16,0,0.8078144073486329
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,4,128,1,float16,float16,0,0.4219823837280273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,4,128,1,float16,fp8,0,0.4418655872344971
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,4,128,1,fp8,fp8,0,0.43779358863830564
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,8,128,1,float16,float16,0,0.46033601760864257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,8,128,1,float16,fp8,0,0.4404287815093994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,24,128,1,float16,float16,0,0.3193279981613159
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,24,128,1,float16,fp8,0,0.30127520561218263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,24,128,1,fp8,fp8,0,0.30192320346832274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,1,128,1,float16,float16,0,0.21503360271453859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,1,128,1,float16,fp8,0,0.23376319408416749
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,1,128,1,fp8,fp8,0,0.23349759578704835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,2,128,1,float16,fp8,0,0.2317823886871338
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,2,128,1,fp8,fp8,0,0.2319808006286621
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,4,128,1,float16,float16,0,0.22215039730072023
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,4,128,1,float16,fp8,0,0.23061439990997315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,4,128,1,fp8,fp8,0,0.23053760528564454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,8,128,1,fp8,fp8,0,0.43619680404663086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,8,128,1,float16,float16,0,0.24078879356384278
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,8,128,1,float16,fp8,0,0.22715361118316652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,24,128,1,float16,float16,0,0.16562880277633668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,8,128,1,fp8,fp8,0,0.23011360168457032
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,24,128,1,float16,fp8,0,0.1583456039428711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,24,128,1,fp8,fp8,0,0.16005760431289673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,1,128,1,float16,float16,0,0.11373759508132934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,2,128,1,float16,float16,0,0.21283040046691895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,1,128,1,float16,fp8,0,0.12447359561920165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,1,128,1,fp8,fp8,0,0.1221392035484314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,2,128,1,float16,float16,0,0.11536799669265747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,2,128,1,float16,fp8,0,0.12217600345611572
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,4,128,1,float16,float16,0,0.11924639940261841
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,4,128,1,float16,fp8,0,0.12346080541610718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,8,128,1,float16,float16,0,0.12890080213546753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,8,128,1,float16,fp8,0,0.12330399751663208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,8,128,1,fp8,fp8,0,0.12182559967041015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,24,128,1,float16,float16,0,0.09021440148353577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,24,128,1,float16,fp8,0,0.08623520135879517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,24,128,1,fp8,fp8,0,0.08661280274391174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,1,128,1,float16,float16,0,0.0620576024055481
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,1,128,1,float16,fp8,0,0.0675599992275238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,1,128,1,fp8,fp8,0,0.0657584011554718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,2,128,1,float16,float16,0,0.0641152024269104
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,2,128,1,float16,fp8,0,0.06599040031433105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,2,128,1,fp8,fp8,0,0.06714720129966736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,4,128,1,float16,float16,0,0.06595519781112671
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,4,128,1,float16,fp8,0,0.06779519915580749
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,4,128,1,fp8,fp8,0,0.06594240069389343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,8,128,1,float16,float16,0,0.07235999703407288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,8,128,1,float16,fp8,0,0.06689599752426148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,8,128,1,fp8,fp8,0,0.06782559752464294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,24,128,1,float16,fp8,0,0.04966880083084106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,24,128,1,fp8,fp8,0,0.05025759935379028
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,1,128,1,float16,float16,0,0.0392304003238678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,1,128,1,float16,fp8,0,0.04121760129928589
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,1,128,1,fp8,fp8,0,0.04120959937572479
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,2,128,1,float16,float16,0,0.039062398672103885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,2,128,1,float16,fp8,0,0.04118080139160156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,2,128,1,fp8,fp8,0,0.1242143988609314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,2,128,1,fp8,fp8,0,0.041222399473190306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,4,128,1,float16,float16,0,0.04065439999103546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,4,128,1,fp8,fp8,0,0.12242239713668823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,4,128,1,fp8,fp8,0,0.04132319986820221
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,8,128,1,float16,fp8,0,0.041361600160598755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,8,128,1,fp8,fp8,0,0.041345599293708804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,24,128,1,float16,float16,0,0.03092319965362549
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,24,128,1,float16,fp8,0,0.03294560015201568
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,24,128,1,fp8,fp8,0,0.032892799377441405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,1,128,1,float16,float16,0,0.02696000039577484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,1,128,1,float16,fp8,0,0.028774398565292358
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,1,128,1,fp8,fp8,0,0.028748801350593566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,2,128,1,float16,float16,0,0.026995199918746948
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,24,128,1,float16,float16,0,0.05148640275001526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,2,128,1,float16,fp8,0,0.028944000601768494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,2,128,1,fp8,fp8,0,0.027935999631881713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,4,128,1,float16,float16,0,0.0289247989654541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,4,128,1,float16,fp8,0,0.027342399954795836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,4,128,1,fp8,fp8,0,0.028788799047470094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,8,128,1,float16,float16,0,0.028935998678207397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,8,128,1,float16,fp8,0,0.029271999001502992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,24,128,1,float16,float16,0,0.020848000049591066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,24,128,1,float16,fp8,0,0.02252960056066513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,24,128,1,fp8,fp8,0,0.021180799603462218
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,1,128,1,float16,float16,0,0.01883520036935806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,1,128,1,float16,fp8,0,0.018782399594783783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,4,128,1,float16,fp8,0,0.04122720062732697
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,2,128,1,float16,float16,0,0.018743999302387238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,8,128,1,float16,float16,0,0.04291360080242157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,2,128,1,float16,fp8,0,0.01886720061302185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,2,128,1,fp8,fp8,0,0.01863040030002594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,4,128,1,float16,float16,0,0.018774400651454925
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,4,128,1,float16,fp8,0,0.01870719939470291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,4,128,1,fp8,fp8,0,0.0188511997461319
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,8,128,1,float16,float16,0,0.020603199303150178
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,8,128,1,float16,fp8,0,0.018806399405002595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,8,128,1,fp8,fp8,0,0.018617600202560425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,24,128,1,float16,float16,0,0.018961599469184874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,24,128,1,float16,fp8,0,0.01868959963321686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,24,128,1,fp8,fp8,0,0.019512000679969787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,1,128,1,float16,float16,0,0.018643200397491455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,1,128,1,float16,fp8,0,0.01860480010509491
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,1,128,1,fp8,fp8,0,0.019064000248908995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,2,128,1,float16,float16,0,0.01865759938955307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,2,128,1,float16,fp8,0,0.018700799345970152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,2,128,1,fp8,fp8,0,0.018750399351119995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,4,128,1,float16,float16,0,0.018636800348758698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,4,128,1,float16,fp8,0,0.01865600049495697
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,8,128,1,fp8,fp8,0,0.02889760136604309
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,4,128,1,fp8,fp8,0,0.018649600446224213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,8,128,1,float16,float16,0,0.018739199638366698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,8,128,1,float16,fp8,0,0.01874080002307892
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,8,128,1,fp8,fp8,0,0.01865919977426529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,1,128,1,float16,float16,0,0.32857599258422854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,1,128,1,float16,fp8,0,0.37747840881347655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,1,128,1,fp8,fp8,0,0.01889760047197342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,1,128,1,fp8,fp8,0,0.37757599353790283
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,2,128,1,float16,float16,0,0.32736160755157473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,2,128,1,float16,fp8,0,0.37869439125061033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,2,128,1,fp8,fp8,0,0.37547519207000735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,4,128,1,float16,float16,0,0.3447999954223633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,4,128,1,float16,fp8,0,0.37719199657440183
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,4,128,1,fp8,fp8,0,0.3747087955474854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,8,128,1,float16,fp8,0,0.3753423929214478
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,8,128,1,fp8,fp8,0,0.37227840423583985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,24,128,1,float16,float16,0,0.28167359828948973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,24,128,1,float16,fp8,0,0.26576321125030516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,1,128,1,float16,float16,0,0.17296639680862427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,1,128,1,float16,fp8,0,0.19686880111694335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,1,128,1,fp8,fp8,0,0.19665440320968627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,2,128,1,float16,float16,0,0.1727023959159851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,2,128,1,float16,fp8,0,0.19540319442749024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,4,128,1,float16,float16,0,0.18109120130538942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,2,128,1,fp8,fp8,0,0.19905760288238525
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,8,128,1,float16,float16,0,0.3821135997772217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,4,128,1,float16,fp8,0,0.19502559900283814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,4,128,1,fp8,fp8,0,0.19839680194854736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,8,128,1,float16,float16,0,0.2004159927368164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,8,128,1,float16,fp8,0,0.1948799967765808
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,8,128,1,fp8,fp8,0,0.1975167989730835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,24,128,1,float16,fp8,0,0.14096319675445557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,1,128,1,float16,float16,0,0.09481599926948547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,24,128,1,fp8,fp8,0,0.14213119745254515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,24,128,1,fp8,fp8,0,0.26762239933013915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,1,128,1,float16,fp8,0,0.10523040294647217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,1,128,1,fp8,fp8,0,0.10728319883346557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,2,128,1,float16,float16,0,0.09719679951667785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,2,128,1,float16,fp8,0,0.10531840324401856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,4,128,1,float16,float16,0,0.10233279466629028
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,2,128,1,fp8,fp8,0,0.10711840391159058
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,4,128,1,float16,fp8,0,0.10598880052566528
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,4,128,1,fp8,fp8,0,0.10671039819717407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,8,128,1,float16,float16,0,0.11146240234375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,8,128,1,float16,fp8,0,0.1054144024848938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,8,128,1,fp8,fp8,0,0.10688320398330689
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,24,128,1,float16,float16,0,0.08197439908981323
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,24,128,1,float16,fp8,0,0.07813439965248108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,1,128,1,float16,fp8,0,0.05928639769554138
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,1,128,1,fp8,fp8,0,0.05888320207595825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,2,128,1,float16,float16,0,0.055619198083877566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,2,128,1,float16,fp8,0,0.05891839861869812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,2,128,1,fp8,fp8,0,0.057918399572372437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,4,128,1,float16,float16,0,0.057601600885391235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,4,128,1,float16,fp8,0,0.05875200033187866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,4,128,1,fp8,fp8,0,0.05862399935722351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,8,128,1,float16,float16,0,0.06255840063095093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,24,128,1,float16,float16,0,0.14700160026550294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,8,128,1,float16,fp8,0,0.05933279991149902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,8,128,1,fp8,fp8,0,0.059622400999069215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,24,128,1,float16,float16,0,0.04524959921836853
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,24,128,1,float16,fp8,0,0.04328640103340149
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,24,128,1,fp8,fp8,0,0.043270400166511534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,1,128,1,float16,float16,0,0.031323200464248656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,1,128,1,float16,fp8,0,0.034990400075912476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,2,128,1,float16,float16,0,0.0324752002954483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,2,128,1,float16,fp8,0,0.03499200046062469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,2,128,1,fp8,fp8,0,0.03500320017337799
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,4,128,1,float16,float16,0,0.0330704003572464
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,4,128,1,float16,fp8,0,0.03510720133781433
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,24,128,1,fp8,fp8,0,0.07924960255622863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,4,128,1,fp8,fp8,0,0.03508639931678772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,8,128,1,float16,float16,0,0.035046398639678955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,8,128,1,float16,fp8,0,0.035016000270843506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,24,128,1,float16,float16,0,0.026876801252365114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,24,128,1,float16,fp8,0,0.029046401381492615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,24,128,1,fp8,fp8,0,0.028951999545097352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,1,128,1,float16,float16,0,0.023664000630378722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,1,128,1,float16,fp8,0,0.02484000027179718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,1,128,1,fp8,fp8,0,0.024772800505161285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,2,128,1,float16,float16,0,0.023928000032901763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,2,128,1,float16,fp8,0,0.02486719936132431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,2,128,1,fp8,fp8,0,0.02478879988193512
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,4,128,1,float16,float16,0,0.024719999730587007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,4,128,1,float16,fp8,0,0.0247856006026268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,4,128,1,fp8,fp8,0,0.02489600032567978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,8,128,1,float16,float16,0,0.024799999594688416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,1,128,1,fp8,fp8,0,0.03503519892692566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,8,128,1,float16,fp8,0,0.024753600358963013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,8,128,1,fp8,fp8,0,0.024792000651359558
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,24,128,1,float16,float16,0,0.01849119961261749
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,24,128,1,float16,fp8,0,0.01871200054883957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,1,128,1,float16,float16,0,0.01703680008649826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,1,128,1,float16,float16,0,0.055801600217819214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,1,128,1,float16,fp8,0,0.016867199540138246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,8,128,1,fp8,fp8,0,0.034995201230049136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,1,128,1,fp8,fp8,0,0.01682240068912506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,2,128,1,float16,float16,0,0.016572800278663636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,2,128,1,fp8,fp8,0,0.016820800304412842
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,2,128,1,float16,fp8,0,0.016913600265979767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,4,128,1,float16,float16,0,0.016729600727558136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,4,128,1,fp8,fp8,0,0.016782400012016297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,4,128,1,float16,fp8,0,0.017083199322223665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,8,128,1,float16,float16,0,0.016920000314712524
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,8,128,1,fp8,fp8,0,0.016710400581359863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,24,128,1,float16,float16,0,0.016803200542926788
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,24,128,1,float16,fp8,0,0.016676799952983858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,24,128,1,fp8,fp8,0,0.016846400499343873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,1,128,1,float16,float16,0,0.016497600078582763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,1,128,1,float16,fp8,0,0.01656000018119812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,1,128,1,fp8,fp8,0,0.016547200083732606
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,2,128,1,float16,float16,0,0.016047999262809753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,2,128,1,float16,fp8,0,0.016582399606704712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,2,128,1,fp8,fp8,0,0.016540800034999848
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,4,128,1,float16,float16,0,0.01650719940662384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,4,128,1,float16,fp8,0,0.016569599509239197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,24,128,1,fp8,fp8,0,0.0187376007437706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,4,128,1,fp8,fp8,0,0.016548800468444824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,8,128,1,float16,float16,0,0.016651199758052827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,8,128,1,float16,fp8,0,0.016492800414562227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,24,128,1,float16,float16,0,0.016590400040149687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,8,128,1,fp8,fp8,0,0.016625599563121797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,24,128,1,float16,fp8,0,0.016646400094032288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,24,128,1,fp8,fp8,0,0.016572800278663636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,1,128,1,float16,float16,0,0.014747199416160584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,1,128,1,float16,fp8,0,0.014865599572658539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,1,128,1,fp8,fp8,0,0.01478240042924881
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,2,128,1,float16,float16,0,0.014996799826622009
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,2,128,1,float16,fp8,0,0.015017600357532501
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,2,128,1,fp8,fp8,0,0.014972800016403198
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,4,128,1,float16,float16,0,0.014843200147151948
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,4,128,1,float16,fp8,0,0.014945599436759948
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,4,128,1,fp8,fp8,0,0.015080000460147857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,8,128,1,float16,float16,0,0.015363200008869171
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,8,128,1,float16,fp8,0,0.016502399742603303
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,8,128,1,fp8,fp8,0,0.014801600575447082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,1,128,1,float16,float16,0,0.20361599922180176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,1,128,1,float16,fp8,0,0.22364799976348876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,1,128,1,fp8,fp8,0,0.22656641006469727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,2,128,1,float16,float16,0,0.19973599910736084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,2,128,1,float16,fp8,0,0.22607998847961425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,2,128,1,fp8,fp8,0,0.22397921085357667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,4,128,1,float16,float16,0,0.20924639701843262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,4,128,1,float16,fp8,0,0.22449760437011718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,8,128,1,float16,float16,0,0.2269376039505005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,8,128,1,fp8,fp8,0,0.2217103958129883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,24,128,1,float16,float16,0,0.15580159425735474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,24,128,1,float16,fp8,0,0.1521407961845398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,24,128,1,fp8,fp8,0,0.15258400440216063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,1,128,1,float16,float16,0,0.10676000118255616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,1,128,1,float16,fp8,0,0.11783839464187622
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,1,128,1,fp8,fp8,0,0.11768319606781005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,8,128,1,float16,fp8,0,0.016908800601959227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,2,128,1,float16,float16,0,0.10698239803314209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,2,128,1,float16,fp8,0,0.11920319795608521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,2,128,1,fp8,fp8,0,0.11735199689865113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,4,128,1,float16,float16,0,0.11322720050811767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,4,128,1,float16,fp8,0,0.11718560457229614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,4,128,1,fp8,fp8,0,0.11910239458084107
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,8,128,1,float16,float16,0,0.12075519561767578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,8,128,1,float16,fp8,0,0.1190943956375122
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,8,128,1,fp8,fp8,0,0.11709599494934082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,24,128,1,float16,float16,0,0.08429120182991028
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,4,128,1,fp8,fp8,0,0.22381119728088378
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,24,128,1,fp8,fp8,0,0.08419520258903504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,1,128,1,float16,float16,0,0.05983359813690185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,1,128,1,float16,fp8,0,0.06401600241661072
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,8,128,1,float16,fp8,0,0.22401759624481202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,1,128,1,fp8,fp8,0,0.06392959952354431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,2,128,1,float16,float16,0,0.06007199883460999
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,2,128,1,float16,fp8,0,0.06402559876441956
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,4,128,1,float16,float16,0,0.0624239981174469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,4,128,1,float16,fp8,0,0.06435520052909852
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,4,128,1,fp8,fp8,0,0.06434239745140076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,8,128,1,float16,float16,0,0.06818879842758178
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,8,128,1,float16,fp8,0,0.06586400270462037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,8,128,1,fp8,fp8,0,0.06583679914474487
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,24,128,1,float16,float16,0,0.04748480021953583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,24,128,1,float16,fp8,0,0.04744639992713928
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,24,128,1,fp8,fp8,0,0.047440001368522645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,1,128,1,float16,float16,0,0.03529280126094818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,1,128,1,fp8,fp8,0,0.03799839913845062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,2,128,1,float16,float16,0,0.035304000973701476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,2,128,1,float16,fp8,0,0.03900960087776184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,2,128,1,fp8,fp8,0,0.03850240111351013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,4,128,1,float16,float16,0,0.03620960116386414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,4,128,1,float16,fp8,0,0.03718560039997101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,4,128,1,fp8,fp8,0,0.03742400109767914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,8,128,1,float16,float16,0,0.039134401082992556
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,8,128,1,float16,fp8,0,0.03808000087738037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,8,128,1,fp8,fp8,0,0.03804480135440826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,24,128,1,float16,float16,0,0.025352001190185547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,24,128,1,float16,fp8,0,0.02877599895000458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,2,128,1,fp8,fp8,0,0.06396160125732422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,24,128,1,fp8,fp8,0,0.027004799246788024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,1,128,1,float16,float16,0,0.021236799657344818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,1,128,1,float16,fp8,0,0.022878399491310118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,1,128,1,fp8,fp8,0,0.02282399982213974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,2,128,1,float16,float16,0,0.021096000075340272
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,2,128,1,float16,fp8,0,0.022860799729824067
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,1,128,1,float16,fp8,0,0.037745600938796996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,2,128,1,fp8,fp8,0,0.022878399491310118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,4,128,1,float16,float16,0,0.022833600640296936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,4,128,1,float16,fp8,0,0.022806400060653688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,4,128,1,fp8,fp8,0,0.023447999358177186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,8,128,1,float16,float16,0,0.022780799865722658
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,8,128,1,float16,fp8,0,0.023531199991703035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,8,128,1,fp8,fp8,0,0.022940799593925476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,24,128,1,float16,float16,0,0.018838399648666383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,24,128,1,fp8,fp8,0,0.020678399503231047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,24,128,1,float16,fp8,0,0.020721599459648132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,1,128,1,float16,float16,0,0.016652800142765045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,1,128,1,float16,fp8,0,0.018532800674438476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,1,128,1,fp8,fp8,0,0.018492799997329713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,2,128,1,float16,fp8,0,0.018643200397491455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,2,128,1,fp8,fp8,0,0.018587200343608855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,24,128,1,float16,fp8,0,0.08240320086479187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,4,128,1,float16,float16,0,0.016715200245380403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,4,128,1,float16,fp8,0,0.01685280054807663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,4,128,1,fp8,fp8,0,0.01855839937925339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,8,128,1,float16,float16,0,0.01659359931945801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,8,128,1,float16,fp8,0,0.018459199368953703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,8,128,1,fp8,fp8,0,0.016631999611854555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,24,128,1,float16,float16,0,0.01348000019788742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,24,128,1,float16,fp8,0,0.014550399780273438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,24,128,1,fp8,fp8,0,0.014483200013637542
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,1,128,1,float16,float16,0,0.012462399899959564
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,1,128,1,float16,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,1,128,1,fp8,fp8,0,0.012491200119256973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,2,128,1,float16,fp8,0,0.012598399817943574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,2,128,1,float16,float16,0,0.012556800246238708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,2,128,1,fp8,fp8,0,0.01252480000257492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,4,128,1,float16,fp8,0,0.012457600235939026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,4,128,1,fp8,fp8,0,0.01249760016798973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,8,128,1,float16,float16,0,0.012486399710178375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,8,128,1,float16,fp8,0,0.012411200255155564
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,8,128,1,fp8,fp8,0,0.01244639977812767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,24,128,1,float16,float16,0,0.012460800260305405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,24,128,1,float16,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,24,128,1,fp8,fp8,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,1,128,1,float16,float16,0,0.012467200309038163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,1,128,1,float16,fp8,0,0.0124719999730587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,1,128,1,fp8,fp8,0,0.0125231996178627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,2,128,1,float16,float16,0,0.012513600289821625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,2,128,1,float16,fp8,0,0.012444800138473511
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,2,128,1,float16,float16,0,0.016648000478744505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,2,128,1,fp8,fp8,0,0.012443199753761292
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,4,128,1,float16,float16,0,0.012505599856376648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,4,128,1,float16,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,4,128,1,fp8,fp8,0,0.012548799812793731
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,8,128,1,float16,float16,0,0.012518399953842163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,8,128,1,float16,fp8,0,0.012564800679683685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,8,128,1,fp8,fp8,0,0.012468799948692322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,24,128,1,float16,float16,0,0.012534399330615998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,24,128,1,fp8,fp8,0,0.012481600046157837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,1,128,1,float16,float16,0,0.012507200241088867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,1,128,1,float16,fp8,0,0.012534399330615998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,24,128,1,float16,fp8,0,0.012545600533485413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,2,128,1,float16,float16,0,0.012427199631929398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,2,128,1,float16,fp8,0,0.0124719999730587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,2,128,1,fp8,fp8,0,0.012481600046157837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,4,128,1,float16,float16,0,0.012468799948692322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,4,128,1,float16,fp8,0,0.012441600114107132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,4,128,1,fp8,fp8,0,0.012459199875593185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,8,128,1,float16,float16,0,0.012436799705028534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,4,128,1,float16,float16,0,0.012628799676895142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,8,128,1,fp8,fp8,0,0.01249919980764389
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,1,128,1,float16,float16,0,0.15877920389175415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,1,128,1,float16,fp8,0,0.1663151979446411
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,1,128,1,fp8,fp8,0,0.16815840005874633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,2,128,1,float16,float16,0,0.15644320249557495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,2,128,1,float16,fp8,0,0.1683359980583191
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,2,128,1,fp8,fp8,0,0.1662176012992859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,4,128,1,float16,float16,0,0.16233919858932494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,4,128,1,float16,fp8,0,0.16605279445648194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,4,128,1,fp8,fp8,0,0.16776479482650758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,8,128,1,float16,float16,0,0.17040159702301025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,8,128,1,float16,fp8,0,0.16640959978103637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,8,128,1,fp8,fp8,0,0.16429920196533204
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,24,128,1,float16,float16,0,0.10722559690475464
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,24,128,1,float16,fp8,0,0.10649919509887695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,24,128,1,fp8,fp8,0,0.1069375991821289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,1,128,1,float16,float16,0,0.08444479703903199
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,1,128,1,float16,fp8,0,0.08830400109291077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,1,128,1,fp8,fp8,0,0.08805599808692932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,2,128,1,float16,float16,0,0.08525760173797607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,2,128,1,float16,fp8,0,0.08833919763565064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,1,128,1,fp8,fp8,0,0.012361600250005721
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,2,128,1,fp8,fp8,0,0.08834400177001953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,4,128,1,float16,float16,0,0.08674240112304688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,4,128,1,fp8,fp8,0,0.08829280138015747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,8,128,1,float16,float16,0,0.0909488022327423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,8,128,1,float16,fp8,0,0.012383999675512314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,4,128,1,float16,fp8,0,0.08891199827194214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,24,128,1,float16,float16,0,0.05960959792137146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,24,128,1,float16,fp8,0,0.05824000239372253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,24,128,1,fp8,fp8,0,0.057999998331069946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,1,128,1,float16,float16,0,0.04739840030670166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,1,128,1,float16,fp8,0,0.049511998891830444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,1,128,1,fp8,fp8,0,0.04962559938430786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,2,128,1,float16,float16,0,0.04749279916286468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,2,128,1,float16,fp8,0,0.04947839975357056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,2,128,1,fp8,fp8,0,0.049414399266242984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,4,128,1,float16,float16,0,0.047839999198913574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,4,128,1,float16,fp8,0,0.049465599656105044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,4,128,1,fp8,fp8,0,0.049481600522994995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,8,128,1,float16,float16,0,0.05031359791755676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,8,128,1,float16,fp8,0,0.04920479953289032
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,24,128,1,float16,float16,0,0.033062401413917544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,24,128,1,float16,fp8,0,0.0345007985830307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,24,128,1,fp8,fp8,0,0.034251201152801516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,8,128,1,float16,fp8,0,0.08884800076484681
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,1,128,1,float16,float16,0,0.02892639935016632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,1,128,1,float16,fp8,0,0.030191999673843384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,1,128,1,fp8,fp8,0,0.02903839945793152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,2,128,1,float16,float16,0,0.028940799832344054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,2,128,1,float16,fp8,0,0.028993600606918336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,2,128,1,fp8,fp8,0,0.03051519989967346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,4,128,1,float16,float16,0,0.029044800996780397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,4,128,1,float16,fp8,0,0.030663999915122985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,4,128,1,fp8,fp8,0,0.029020801186561584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,8,128,1,float16,fp8,0,0.028998398780822755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,8,128,1,fp8,fp8,0,0.02934719920158386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,24,128,1,float16,float16,0,0.020761600136756896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,24,128,1,float16,fp8,0,0.020880000293254854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,24,128,1,fp8,fp8,0,0.02081120014190674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,1,128,1,float16,float16,0,0.018699200451374055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,1,128,1,float16,fp8,0,0.018824000656604768
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,1,128,1,fp8,fp8,0,0.018751999735832213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,8,128,1,fp8,fp8,0,0.0494735985994339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,2,128,1,float16,float16,0,0.018807999789714813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,2,128,1,float16,fp8,0,0.01897760033607483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,2,128,1,fp8,fp8,0,0.01892479956150055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,4,128,1,float16,float16,0,0.018822400271892546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,4,128,1,float16,fp8,0,0.0188400000333786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,8,128,1,float16,float16,0,0.01881919950246811
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,8,128,1,fp8,fp8,0,0.08840159773826599
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,8,128,1,float16,fp8,0,0.0187376007437706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,8,128,1,fp8,fp8,0,0.018910400569438934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,24,128,1,float16,fp8,0,0.01664479970932007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,24,128,1,fp8,fp8,0,0.016568000614643096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,1,128,1,float16,float16,0,0.014526399970054626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,8,128,1,float16,float16,0,0.030899199843406677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,1,128,1,fp8,fp8,0,0.014672000706195832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,2,128,1,float16,float16,0,0.014646400511264802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,2,128,1,float16,fp8,0,0.014796799421310425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,2,128,1,fp8,fp8,0,0.014635199308395385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,4,128,1,float16,float16,0,0.014575999975204468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,4,128,1,float16,fp8,0,0.014798399806022645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,4,128,1,fp8,fp8,0,0.014686399698257446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,8,128,1,float16,float16,0,0.0147024005651474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,8,128,1,float16,fp8,0,0.014727999269962311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,8,128,1,fp8,fp8,0,0.014817599952220917
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,24,128,1,float16,float16,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,24,128,1,float16,fp8,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,24,128,1,fp8,fp8,0,0.012513600289821625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,4,128,1,fp8,fp8,0,0.01873600035905838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,1,128,1,float16,float16,0,0.010742399841547012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,1,128,1,float16,fp8,0,0.012123200297355651
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,1,128,1,fp8,fp8,0,0.012342400103807449
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,2,128,1,float16,float16,0,0.011222399771213531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,2,128,1,float16,fp8,0,0.012579199671745301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,2,128,1,fp8,fp8,0,0.012479999661445617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,24,128,1,float16,float16,0,0.014672000706195832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,4,128,1,float16,float16,0,0.011832000315189361
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,4,128,1,float16,fp8,0,0.012294399738311767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,4,128,1,fp8,fp8,0,0.01093600019812584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,8,128,1,float16,fp8,0,0.012361600250005721
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,8,128,1,fp8,fp8,0,0.010969600081443787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,24,128,1,float16,float16,0,0.012372799962759019
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,24,128,1,float16,fp8,0,0.010651200264692306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,24,128,1,fp8,fp8,0,0.0124719999730587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,1,128,1,float16,fp8,0,0.014560000598430633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,1,128,1,float16,fp8,0,0.010761599987745285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,2,128,1,float16,float16,0,0.010636799782514573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,2,128,1,float16,fp8,0,0.010654400289058685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,2,128,1,fp8,fp8,0,0.010625600069761276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,4,128,1,float16,float16,0,0.01067200005054474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,4,128,1,float16,fp8,0,0.010822399705648422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,4,128,1,fp8,fp8,0,0.010753600299358368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,8,128,1,float16,float16,0,0.010784000158309937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,8,128,1,float16,fp8,0,0.011073599755764007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,8,128,1,fp8,fp8,0,0.010993599891662598
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,24,128,1,float16,float16,0,0.012556800246238708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,24,128,1,float16,fp8,0,0.010860799998044967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,24,128,1,fp8,fp8,0,0.011054400354623795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,1,128,1,float16,float16,0,0.010627199709415436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,1,128,1,float16,fp8,0,0.010590399801731109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,1,128,1,fp8,fp8,0,0.010611200332641601
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,2,128,1,float16,float16,0,0.01061599999666214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,2,128,1,float16,fp8,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,2,128,1,fp8,fp8,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,8,128,1,float16,float16,0,0.012513600289821625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,4,128,1,float16,float16,0,0.01064639985561371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,4,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,4,128,1,fp8,fp8,0,0.011054400354623795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,8,128,1,float16,float16,0,0.01061440035700798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,1,128,1,fp8,fp8,0,0.01053759977221489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,8,128,1,float16,fp8,0,0.010841599851846694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,8,128,1,fp8,fp8,0,0.010660800337791442
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,1,128,1,float16,float16,0,0.13762400150299073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,1,128,1,float16,fp8,0,0.13767679929733276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,1,128,1,fp8,fp8,0,0.13872480392456055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,2,128,1,float16,fp8,0,0.13885760307312012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,2,128,1,fp8,fp8,0,0.13948479890823365
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,4,128,1,float16,float16,0,0.13920799493789673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,4,128,1,float16,fp8,0,0.13801920413970947
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,4,128,1,fp8,fp8,0,0.13873120546340942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,8,128,1,float16,float16,0,0.14363679885864258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,8,128,1,float16,fp8,0,0.13927199840545654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,1,128,1,float16,float16,0,0.011007999628782272
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,8,128,1,fp8,fp8,0,0.13977760076522827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,24,128,1,float16,float16,0,0.08267040252685547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,24,128,1,float16,fp8,0,0.0831391990184784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,24,128,1,fp8,fp8,0,0.08239039778709412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,1,128,1,float16,float16,0,0.0722383975982666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,2,128,1,float16,float16,0,0.13551199436187744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,1,128,1,float16,fp8,0,0.07300320267677307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,1,128,1,fp8,fp8,0,0.07425280213356018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,2,128,1,float16,float16,0,0.07221919894218445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,2,128,1,float16,fp8,0,0.07378079891204833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,4,128,1,float16,float16,0,0.07228320240974426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,2,128,1,fp8,fp8,0,0.07407039999961854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,4,128,1,float16,fp8,0,0.07353119850158692
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,4,128,1,fp8,fp8,0,0.07295680046081543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,8,128,1,float16,float16,0,0.07654240131378173
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,8,128,1,float16,fp8,0,0.07363520264625549
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,8,128,1,fp8,fp8,0,0.07310559749603271
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,24,128,1,float16,float16,0,0.04554400146007538
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,24,128,1,float16,fp8,0,0.045500800013542175
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,24,128,1,fp8,fp8,0,0.04535520076751709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,1,128,1,float16,float16,0,0.04131360054016113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,1,128,1,float16,fp8,0,0.0415583997964859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,1,128,1,fp8,fp8,0,0.04259200096130371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,2,128,1,float16,float16,0,0.041345599293708804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,2,128,1,float16,fp8,0,0.04158560037612915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,2,128,1,fp8,fp8,0,0.04132800102233887
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,4,128,1,float16,fp8,0,0.041264000535011294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,8,128,1,float16,float16,0,0.041577601432800294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,4,128,1,fp8,fp8,0,0.041438400745391846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,8,128,1,float16,fp8,0,0.04152320027351379
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,24,128,1,float16,float16,0,0.027039998769760133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,24,128,1,float16,fp8,0,0.02887200117111206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,24,128,1,fp8,fp8,0,0.02887200117111206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,1,128,1,float16,float16,0,0.024876800179481507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,1,128,1,float16,fp8,0,0.02678079903125763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,1,128,1,fp8,fp8,0,0.026815998554229736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,2,128,1,float16,float16,0,0.026347199082374574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,2,128,1,float16,fp8,0,0.026899200677871705
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,2,128,1,fp8,fp8,0,0.026918399333953857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,4,128,1,float16,float16,0,0.026099199056625368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,4,128,1,float16,fp8,0,0.026894399523735048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,4,128,1,fp8,fp8,0,0.026814401149749756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,8,128,1,float16,float16,0,0.02691679894924164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,8,128,1,float16,fp8,0,0.02627359926700592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,8,128,1,fp8,fp8,0,0.027003198862075806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,24,128,1,float16,fp8,0,0.018774400651454925
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,24,128,1,fp8,fp8,0,0.01875839978456497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,1,128,1,float16,float16,0,0.016680000722408293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,1,128,1,float16,fp8,0,0.01685120016336441
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,1,128,1,fp8,fp8,0,0.016590400040149687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,2,128,1,float16,float16,0,0.01671839952468872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,2,128,1,float16,fp8,0,0.016774399578571318
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,2,128,1,fp8,fp8,0,0.016739200055599212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,4,128,1,float16,float16,0,0.04142400026321411
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,4,128,1,float16,float16,0,0.016809600591659545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,4,128,1,float16,fp8,0,0.017155200242996216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,4,128,1,fp8,fp8,0,0.01675039976835251
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,8,128,1,float16,float16,0,0.018508799374103546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,8,128,1,fp8,fp8,0,0.04115679860115051
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,8,128,1,fp8,fp8,0,0.01669120043516159
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,24,128,1,float16,fp8,0,0.014555199444293976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,24,128,1,fp8,fp8,0,0.014707200229167938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,1,128,1,float16,float16,0,0.014478400349617004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,1,128,1,float16,fp8,0,0.014395199716091156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,1,128,1,fp8,fp8,0,0.014414399862289429
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,2,128,1,float16,float16,0,0.014470399916172027
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,2,128,1,float16,fp8,0,0.01459839940071106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,2,128,1,fp8,fp8,0,0.014535999298095703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,4,128,1,float16,float16,0,0.01441120058298111
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,24,128,1,float16,float16,0,0.018569600582122803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,4,128,1,float16,fp8,0,0.014443199336528777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,4,128,1,fp8,fp8,0,0.014608000218868256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,8,128,1,float16,float16,0,0.014435200393199921
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,8,128,1,float16,fp8,0,0.014574399590492249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,24,128,1,float16,float16,0,0.01077599972486496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,24,128,1,float16,fp8,0,0.010552000254392624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,24,128,1,fp8,fp8,0,0.01066880002617836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,1,128,1,float16,float16,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,1,128,1,float16,fp8,0,0.010660800337791442
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,1,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,2,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,2,128,1,float16,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,8,128,1,float16,fp8,0,0.017075200378894807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,24,128,1,float16,float16,0,0.014601600170135499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,2,128,1,fp8,fp8,0,0.010887999832630158
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,4,128,1,float16,float16,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,4,128,1,fp8,fp8,0,0.010571199655532836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,8,128,1,float16,float16,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,8,128,1,float16,fp8,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,8,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,24,128,1,float16,float16,0,0.01061279997229576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,24,128,1,float16,fp8,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,8,128,1,fp8,fp8,0,0.01446560025215149
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,24,128,1,fp8,fp8,0,0.01072160005569458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,1,128,1,float16,float16,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,1,128,1,float16,fp8,0,0.010648000240325927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,1,128,1,fp8,fp8,0,0.010649599879980088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,2,128,1,float16,float16,0,0.010654400289058685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,2,128,1,float16,fp8,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,2,128,1,fp8,fp8,0,0.010678400099277497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,4,128,1,float16,fp8,0,0.01067200005054474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,4,128,1,fp8,fp8,0,0.010654400289058685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,8,128,1,float16,float16,0,0.01069599986076355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,8,128,1,float16,fp8,0,0.010711999982595444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,8,128,1,fp8,fp8,0,0.010558400303125381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,4,128,1,float16,fp8,0,0.010999999940395355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,24,128,1,float16,float16,0,0.01063840016722679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,24,128,1,float16,fp8,0,0.010763200372457505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,1,128,1,float16,float16,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,1,128,1,float16,fp8,0,0.010659199953079224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,1,128,1,fp8,fp8,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,2,128,1,float16,float16,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,2,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,2,128,1,fp8,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,4,128,1,float16,float16,0,0.010532800108194351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,4,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,4,128,1,fp8,fp8,0,0.01053600013256073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,8,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,8,128,1,float16,fp8,0,0.010604800283908844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,8,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,4,128,1,float16,float16,0,0.010547199845314026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,1,128,1,float16,float16,0,0.12933119535446166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,1,128,1,float16,fp8,0,0.12539999485015868
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,1,128,1,fp8,fp8,0,0.12349920272827149
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,2,128,1,float16,float16,0,0.1278720021247864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,2,128,1,fp8,fp8,0,0.12437759637832642
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,2,128,1,float16,fp8,0,0.12522720098495482
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,4,128,1,float16,float16,0,0.12815200090408324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,4,128,1,float16,fp8,0,0.12359999418258667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,4,128,1,fp8,fp8,0,0.12534400224685668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,8,128,1,float16,float16,0,0.1328287959098816
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,8,128,1,float16,fp8,0,0.12404160499572754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,8,128,1,fp8,fp8,0,0.12445600032806396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,24,128,1,float16,float16,0,0.07415199875831605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,24,128,1,fp8,fp8,0,0.07195839881896973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,1,128,1,float16,float16,0,0.0686240017414093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,1,128,1,float16,fp8,0,0.06601439714431763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,2,128,1,float16,float16,0,0.07010239958763123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,2,128,1,float16,fp8,0,0.06590719819068909
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,2,128,1,fp8,fp8,0,0.06600639820098878
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,4,128,1,float16,float16,0,0.07006719708442688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,4,128,1,float16,fp8,0,0.0659712016582489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,4,128,1,fp8,fp8,0,0.06727839708328247
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,8,128,1,float16,float16,0,0.07162079811096192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,8,128,1,float16,fp8,0,0.06784960031509399
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,8,128,1,fp8,fp8,0,0.06781759858131409
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,24,128,1,float16,float16,0,0.04133920073509216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,24,128,1,float16,fp8,0,0.041223999857902524
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,24,128,1,fp8,fp8,0,0.04111360013484955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,1,128,1,float16,float16,0,0.03932960033416748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,1,128,1,float16,fp8,0,0.039124798774719236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,1,128,1,fp8,fp8,0,0.039182400703430174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,24,128,1,float16,fp8,0,0.07197759747505188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,2,128,1,float16,float16,0,0.03949759900569916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,24,128,1,fp8,fp8,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,2,128,1,float16,fp8,0,0.039155200123786926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,2,128,1,fp8,fp8,0,0.03913759887218475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,4,128,1,float16,float16,0,0.03936159908771515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,4,128,1,float16,fp8,0,0.039129599928855896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,4,128,1,fp8,fp8,0,0.03911199867725372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,1,128,1,fp8,fp8,0,0.06590080261230469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,8,128,1,float16,float16,0,0.04001759886741638
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,8,128,1,fp8,fp8,0,0.039057600498199466
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,24,128,1,float16,float16,0,0.025118398666381835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,24,128,1,fp8,fp8,0,0.02502560019493103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,24,128,1,float16,fp8,0,0.02494879961013794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,1,128,1,float16,float16,0,0.025003200769424437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,1,128,1,float16,fp8,0,0.024827200174331664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,1,128,1,fp8,fp8,0,0.0253711998462677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,2,128,1,float16,float16,0,0.024883200228214265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,2,128,1,fp8,fp8,0,0.024881599843502043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,4,128,1,float16,float16,0,0.024942399561405183
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,4,128,1,float16,fp8,0,0.024966399371623992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,4,128,1,fp8,fp8,0,0.024872000515460967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,8,128,1,float16,float16,0,0.02494879961013794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,8,128,1,float16,fp8,0,0.02481600046157837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,8,128,1,fp8,fp8,0,0.02491839975118637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,24,128,1,float16,float16,0,0.01677599996328354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,24,128,1,float16,fp8,0,0.016707199811935424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,24,128,1,fp8,fp8,0,0.016711999475955964
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,1,128,1,float16,float16,0,0.01672160029411316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,1,128,1,float16,fp8,0,0.016628800332546233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,1,128,1,fp8,fp8,0,0.01675039976835251
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,2,128,1,float16,float16,0,0.016740800440311433
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,2,128,1,float16,fp8,0,0.016760000586509706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,2,128,1,fp8,fp8,0,0.0166703999042511
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,4,128,1,float16,float16,0,0.016729600727558136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,4,128,1,float16,fp8,0,0.016763199865818024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,4,128,1,fp8,fp8,0,0.016774399578571318
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,8,128,1,float16,float16,0,0.01676799952983856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,8,128,1,float16,fp8,0,0.01677280068397522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,2,128,1,float16,fp8,0,0.024673600494861603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,8,128,1,fp8,fp8,0,0.016649599373340606
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,24,128,1,float16,float16,0,0.01451680064201355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,24,128,1,fp8,fp8,0,0.013887999951839447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,1,128,1,float16,float16,0,0.01286720037460327
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,1,128,1,float16,fp8,0,0.012708799540996551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,1,128,1,fp8,fp8,0,0.01268800050020218
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,2,128,1,float16,float16,0,0.012673600018024445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,2,128,1,float16,fp8,0,0.012881599366664886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,2,128,1,fp8,fp8,0,0.01435520052909851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,4,128,1,float16,float16,0,0.014646400511264802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,4,128,1,float16,fp8,0,0.014180800318717957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,4,128,1,fp8,fp8,0,0.01435839980840683
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,8,128,1,float16,float16,0,0.014481599628925323
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,8,128,1,float16,fp8,0,0.014209599792957306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,8,128,1,fp8,fp8,0,0.014025600254535675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,8,128,1,float16,fp8,0,0.03922399878501892
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,24,128,1,float16,fp8,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,1,128,1,float16,float16,0,0.010686399787664414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,24,128,1,fp8,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,1,128,1,float16,fp8,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,1,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,24,128,1,float16,fp8,0,0.012641599774360657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,2,128,1,float16,fp8,0,0.010608000308275222
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,2,128,1,fp8,fp8,0,0.010583999752998351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,4,128,1,float16,float16,0,0.010692799836397171
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,4,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,4,128,1,fp8,fp8,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,8,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,8,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,8,128,1,fp8,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,24,128,1,float16,float16,0,0.010632000118494033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,24,128,1,float16,fp8,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,24,128,1,fp8,fp8,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,1,128,1,float16,float16,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,1,128,1,float16,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,1,128,1,fp8,fp8,0,0.010532800108194351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,2,128,1,float16,float16,0,0.011025600135326385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,2,128,1,float16,fp8,0,0.010707200318574906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,2,128,1,fp8,fp8,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,4,128,1,float16,float16,0,0.010683199763298035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,4,128,1,float16,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,4,128,1,fp8,fp8,0,0.010662399977445603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,8,128,1,float16,float16,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,8,128,1,float16,fp8,0,0.010579200088977813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,8,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,24,128,1,float16,float16,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,24,128,1,float16,fp8,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,24,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,1,128,1,float16,float16,0,0.010552000254392624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,1,128,1,float16,fp8,0,0.010606399923563003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,1,128,1,fp8,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,24,128,1,float16,float16,0,0.012451200187206269
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,2,128,1,float16,float16,0,0.01061599999666214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,2,128,1,float16,fp8,0,0.010649599879980088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,4,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,4,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,4,128,1,fp8,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,8,128,1,float16,float16,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,8,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,8,128,1,fp8,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,1,128,1,float16,float16,0,0.12560319900512695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,1,128,1,float16,fp8,0,0.11717599630355835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,1,128,1,fp8,fp8,0,0.11724799871444702
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,2,128,1,float16,float16,0,0.1263759970664978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,2,128,1,float16,fp8,0,0.11735199689865113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,2,128,1,fp8,fp8,0,0.11741119623184204
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,4,128,1,float16,float16,0,0.12572480440139772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,4,128,1,float16,fp8,0,0.11735999584197998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,4,128,1,fp8,fp8,0,0.1174496054649353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,8,128,1,float16,float16,0,0.1256432056427002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,8,128,1,float16,fp8,0,0.11829119920730591
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,8,128,1,fp8,fp8,0,0.11714400053024292
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,0,0.06815680265426635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,0,0.06394720077514648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,24,128,1,fp8,fp8,0,0.0637503981590271
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,1,128,1,float16,float16,0,0.06808800101280213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,1,128,1,float16,fp8,0,0.06379039883613587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,1,128,1,fp8,fp8,0,0.06372640132904053
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,2,128,1,float16,float16,0,0.0681984007358551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,2,128,1,float16,fp8,0,0.0639360010623932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,2,128,1,fp8,fp8,0,0.06377919912338256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,4,128,1,float16,float16,0,0.06796000003814698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,4,128,1,float16,fp8,0,0.06383039951324462
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,4,128,1,fp8,fp8,0,0.0638159990310669
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,8,128,1,float16,float16,0,0.06812639832496643
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,8,128,1,float16,fp8,0,0.06379680037498474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,8,128,1,fp8,fp8,0,0.06379200220108032
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,0,0.03935840129852295
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,0,0.037145599722862244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,24,128,1,fp8,fp8,0,0.03715839982032776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,1,128,1,float16,float16,0,0.0392304003238678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,1,128,1,float16,fp8,0,0.037190398573875426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,1,128,1,fp8,fp8,0,0.03718400001525879
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,2,128,1,float16,float16,0,0.03926079869270325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,2,128,1,float16,fp8,0,0.03722879886627197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,2,128,1,fp8,fp8,0,0.03747999966144562
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,4,128,1,float16,float16,0,0.03928000032901764
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,4,128,1,float16,fp8,0,0.03733600080013275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,4,128,1,fp8,fp8,0,0.037273600697517395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,2,128,1,fp8,fp8,0,0.010345599800348281
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,8,128,1,float16,fp8,0,0.03741439878940582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,2,128,1,float16,float16,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,8,128,1,fp8,fp8,0,0.037099200487136844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,0,0.02499680072069168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,24,128,1,fp8,fp8,0,0.02300799936056137
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,1,128,1,float16,float16,0,0.02489120066165924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,1,128,1,float16,fp8,0,0.023401600122451783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,1,128,1,fp8,fp8,0,0.02353599965572357
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,2,128,1,float16,float16,0,0.02493920028209686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,2,128,1,float16,fp8,0,0.024527999758720397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,2,128,1,fp8,fp8,0,0.022908799350261688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,4,128,1,float16,float16,0,0.02481279969215393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,4,128,1,float16,fp8,0,0.022916799783706664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,4,128,1,fp8,fp8,0,0.02295520007610321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,8,128,1,float16,float16,0,0.02487040013074875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,8,128,1,float16,fp8,0,0.02396160066127777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,8,128,1,fp8,fp8,0,0.02486560046672821
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,0,0.018572799861431122
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,0,0.016657599806785585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,1,128,1,float16,float16,0,0.016787199676036833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,24,128,1,fp8,fp8,0,0.01709599941968918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,1,128,1,float16,fp8,0,0.016524800658226015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,8,128,1,float16,float16,0,0.0393312007188797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,2,128,1,float16,float16,0,0.01687840074300766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,1,128,1,fp8,fp8,0,0.015476800501346588
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,4,128,1,float16,float16,0,0.01716320067644119
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,0,0.02304159998893738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,4,128,1,float16,fp8,0,0.01700959950685501
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,4,128,1,fp8,fp8,0,0.016539199650287627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,8,128,1,float16,float16,0,0.017150400578975676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,8,128,1,float16,fp8,0,0.01661120057106018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,8,128,1,fp8,fp8,0,0.017047999799251555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,0,0.013014400005340576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,24,128,1,fp8,fp8,0,0.012948800623416901
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,1,128,1,float16,float16,0,0.014788800477981567
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,1,128,1,float16,fp8,0,0.012905600666999816
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,1,128,1,fp8,fp8,0,0.012956799566745758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,2,128,1,float16,float16,0,0.014142400026321411
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,2,128,1,float16,fp8,0,0.012910400331020356
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,2,128,1,fp8,fp8,0,0.012825599312782288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,4,128,1,float16,float16,0,0.012838399410247803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,2,128,1,float16,fp8,0,0.016711999475955964
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,4,128,1,float16,fp8,0,0.012758399546146392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,4,128,1,fp8,fp8,0,0.012588800489902496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,8,128,1,float16,float16,0,0.012807999551296235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,8,128,1,float16,fp8,0,0.012606400251388549
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,8,128,1,fp8,fp8,0,0.012825599312782288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,0,0.010726399719715118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,24,128,1,fp8,fp8,0,0.01071999967098236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,0,0.01465120017528534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,1,128,1,float16,float16,0,0.010705599933862687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,1,128,1,float16,fp8,0,0.010662399977445603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,2,128,1,float16,float16,0,0.010547199845314026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,1,128,1,fp8,fp8,0,0.010702399909496308
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,2,128,1,float16,fp8,0,0.010622400045394897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,2,128,1,fp8,fp8,0,0.010657600313425063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,4,128,1,float16,float16,0,0.010595200210809707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,4,128,1,fp8,fp8,0,0.010603199899196624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,8,128,1,float16,float16,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,8,128,1,float16,fp8,0,0.01063840016722679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,8,128,1,fp8,fp8,0,0.010590399801731109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,24,128,1,fp8,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,1,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,1,128,1,float16,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,1,128,1,fp8,fp8,0,0.010307200253009796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,2,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,2,128,1,fp8,fp8,0,0.01669120043516159
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,2,128,1,float16,fp8,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,2,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,4,128,1,float16,float16,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,4,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,4,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,8,128,1,float16,float16,0,0.010619200021028518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,8,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,8,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,0,0.010606399923563003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,24,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,1,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,1,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,1,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,2,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,2,128,1,float16,fp8,0,0.010633599758148194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,2,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,4,128,1,float16,float16,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,4,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,4,128,1,fp8,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,8,128,1,float16,float16,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,8,128,1,float16,fp8,0,0.01067039966583252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,4,128,1,float16,fp8,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,8,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,0,0.012595200538635254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,1,128,1,float16,fp8,0,5.540708923339844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,1,128,1,fp8,fp8,0,5.719705581665039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,1,128,1,float16,float16,0,8.792469024658203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,2,128,1,float16,fp8,0,5.458321762084961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,2,128,1,fp8,fp8,0,5.80846061706543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,2,128,1,float16,float16,0,9.46411361694336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,4,128,1,float16,fp8,0,5.653324890136719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,4,128,1,fp8,fp8,0,5.912803268432617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,4,128,1,float16,float16,0,8.937417602539062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,8,128,1,float16,fp8,0,5.529694366455078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,8,128,1,float16,float16,0,10.719099426269532
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,8,128,1,fp8,fp8,0,6.111217498779297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,16,128,1,float16,fp8,0,2.77860164642334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,1,128,1,float16,float16,0,5.105847930908203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,16,128,1,fp8,fp8,0,2.7768720626831054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,1,128,1,float16,fp8,0,2.737945556640625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,1,128,1,fp8,fp8,0,3.0613712310791015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,2,128,1,float16,fp8,0,2.7222944259643556
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,2,128,1,fp8,fp8,0,2.8961952209472654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,4,128,1,float16,float16,0,3.329324722290039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,2,128,1,float16,float16,0,3.6712673187255858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,4,128,1,float16,fp8,0,2.868769645690918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,4,128,1,fp8,fp8,0,3.1121904373168947
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,8,128,1,float16,fp8,0,2.8671119689941404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,8,128,1,fp8,fp8,0,2.762790489196777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,8,128,1,float16,float16,0,3.639236831665039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,16,128,1,float16,fp8,0,1.4826160430908204
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,16,128,1,fp8,fp8,0,1.4507247924804687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,1,128,1,float16,float16,0,1.8683183670043946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,1,128,1,float16,fp8,0,1.4224464416503906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,1,128,1,fp8,fp8,0,1.5654704093933105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,2,128,1,float16,float16,0,1.6291807174682618
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,2,128,1,float16,fp8,0,1.413691234588623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,2,128,1,fp8,fp8,0,1.6346975326538087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,4,128,1,float16,float16,0,1.676353645324707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,4,128,1,float16,fp8,0,1.4477824211120605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,4,128,1,fp8,fp8,0,1.5479519844055176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,8,128,1,float16,float16,0,1.6630704879760743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,8,128,1,float16,fp8,0,1.4243583679199219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,8,128,1,fp8,fp8,0,1.6979904174804688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,16,128,1,float16,fp8,0,0.8378815650939941
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,16,128,1,fp8,fp8,0,0.7962607860565185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,1,128,1,float16,float16,0,0.8815520286560059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,1,128,1,float16,fp8,0,0.7823760032653808
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,1,128,1,fp8,fp8,0,0.7760704040527344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,2,128,1,float16,float16,0,0.8840144157409668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,2,128,1,float16,fp8,0,0.7921696186065674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,2,128,1,fp8,fp8,0,0.7841792106628418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,4,128,1,float16,float16,0,0.8957679748535157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,4,128,1,float16,fp8,0,0.7775936126708984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,4,128,1,fp8,fp8,0,0.7764592170715332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,8,128,1,float16,float16,0,0.9050175666809082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,8,128,1,float16,fp8,0,0.7828927993774414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,8,128,1,fp8,fp8,0,0.7785488128662109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,1,128,1,float16,float16,0,4.885054397583008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,1,128,1,float16,fp8,0,3.1517072677612306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,1,128,1,fp8,fp8,0,3.331243133544922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,2,128,1,float16,float16,0,4.759270477294922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,2,128,1,float16,fp8,0,3.4092830657958983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,2,128,1,fp8,fp8,0,3.108705520629883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,4,128,1,float16,float16,0,5.314459228515625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,4,128,1,float16,fp8,0,3.5119728088378905
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,4,128,1,fp8,fp8,0,3.3676174163818358
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,8,128,1,float16,float16,0,4.891835021972656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,8,128,1,float16,fp8,0,3.5260543823242188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,8,128,1,fp8,fp8,0,3.304987335205078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,16,128,1,float16,float16,0,0.9096976280212402
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,16,128,1,float16,float16,0,1.6376655578613282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,16,128,1,float16,float16,0,3.927227020263672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,16,128,1,float16,fp8,0,1.6420415878295898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,16,128,1,float16,float16,0,1.9070880889892579
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,16,128,1,fp8,fp8,0,1.6873632431030274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,1,128,1,float16,float16,0,1.999830436706543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,1,128,1,float16,fp8,0,1.6293888092041016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,1,128,1,fp8,fp8,0,1.693899154663086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,2,128,1,float16,float16,0,1.7645231246948243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,2,128,1,float16,fp8,0,1.6911808013916017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,2,128,1,fp8,fp8,0,1.5911855697631836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,4,128,1,float16,float16,0,1.8357135772705078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,4,128,1,fp8,fp8,0,1.6031967163085938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,4,128,1,float16,fp8,0,2.3260656356811524
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,8,128,1,float16,float16,0,1.8876480102539062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,16,128,1,float16,float16,0,1.0139552116394044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,8,128,1,fp8,fp8,0,1.6051376342773438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,16,128,1,float16,fp8,0,1.187649631500244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,16,128,1,fp8,fp8,0,0.8883312225341797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,1,128,1,float16,fp8,0,0.8525600433349609
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,1,128,1,float16,float16,0,0.9390720367431641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,1,128,1,fp8,fp8,0,1.0047072410583495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,2,128,1,float16,fp8,0,0.8806287765502929
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,2,128,1,float16,float16,0,0.961081600189209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,2,128,1,fp8,fp8,0,0.8427280426025391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,4,128,1,float16,fp8,0,0.8599455833435059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,4,128,1,float16,float16,0,0.9530480384826661
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,4,128,1,fp8,fp8,0,0.8447695732116699
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,8,128,1,float16,fp8,0,0.893131160736084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,8,128,1,float16,float16,0,0.9825087547302246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,8,128,1,fp8,fp8,0,0.8420864105224609
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,16,128,1,float16,float16,0,0.7006400108337403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,16,128,1,float16,fp8,0,0.5031407833099365
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,16,128,1,fp8,fp8,0,0.6647615909576416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,1,128,1,float16,fp8,0,0.4797376155853271
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,8,128,1,float16,fp8,0,1.6427152633666993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,1,128,1,fp8,fp8,0,0.48347997665405273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,2,128,1,float16,float16,0,0.5434192180633545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,2,128,1,float16,fp8,0,0.48139200210571287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,2,128,1,fp8,fp8,0,0.4829103946685791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,4,128,1,float16,float16,0,0.5436031818389893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,4,128,1,float16,fp8,0,0.48288640975952146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,4,128,1,fp8,fp8,0,0.48217120170593264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,1,128,1,float16,float16,0,0.5306320190429688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,8,128,1,float16,fp8,0,0.48170881271362304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,8,128,1,fp8,fp8,0,0.4755727767944336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,1,128,1,float16,fp8,0,2.177734375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,1,128,1,float16,float16,0,2.4398128509521486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,1,128,1,fp8,fp8,0,2.1817487716674804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,8,128,1,float16,float16,0,0.5553343772888184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,2,128,1,fp8,fp8,0,2.186244773864746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,2,128,1,float16,float16,0,2.835339164733887
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,2,128,1,float16,fp8,0,2.2367679595947267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,4,128,1,float16,float16,0,2.7116575241088867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,4,128,1,float16,fp8,0,2.1790719985961915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,4,128,1,fp8,fp8,0,2.3300527572631835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,8,128,1,float16,float16,0,2.5149871826171877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,8,128,1,float16,fp8,0,2.9466320037841798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,16,128,1,float16,float16,0,1.3676159858703614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,16,128,1,float16,fp8,0,1.6653247833251954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,8,128,1,fp8,fp8,0,2.252604866027832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,16,128,1,fp8,fp8,0,1.251638412475586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,1,128,1,float16,float16,0,1.6427984237670898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,1,128,1,fp8,fp8,0,1.1505392074584961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,2,128,1,float16,float16,0,1.313918399810791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,2,128,1,fp8,fp8,0,1.134000015258789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,2,128,1,float16,fp8,0,1.466868782043457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,4,128,1,float16,float16,0,1.3132495880126953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,4,128,1,float16,fp8,0,1.1325599670410156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,4,128,1,fp8,fp8,0,1.1370223999023437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,1,128,1,float16,fp8,0,1.2064271926879884
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,8,128,1,float16,float16,0,1.6525423049926757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,8,128,1,float16,fp8,0,1.1387568473815919
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,8,128,1,fp8,fp8,0,1.1390751838684081
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,16,128,1,float16,float16,0,0.739404821395874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,1,128,1,float16,float16,0,0.6996431827545166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,16,128,1,fp8,fp8,0,0.6374479770660401
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,16,128,1,float16,fp8,0,0.9730976104736329
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,1,128,1,float16,fp8,0,0.6269152164459229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,1,128,1,fp8,fp8,0,0.6352672100067138
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,2,128,1,float16,float16,0,0.6856832027435302
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,2,128,1,float16,fp8,0,0.9657152175903321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,4,128,1,float16,float16,0,0.7080175876617432
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,4,128,1,float16,fp8,0,0.6381392002105712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,4,128,1,fp8,fp8,0,0.6547264099121094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,8,128,1,float16,float16,0,0.7194831848144532
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,8,128,1,float16,fp8,0,0.6186543941497803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,8,128,1,fp8,fp8,0,0.6263872146606445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,16,128,1,float16,float16,0,0.4253087997436523
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,16,128,1,float16,fp8,0,0.36551361083984374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,16,128,1,fp8,fp8,0,0.3702752113342285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,1,128,1,float16,float16,0,0.40555200576782224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,1,128,1,float16,fp8,0,0.35252320766448975
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,1,128,1,fp8,fp8,0,0.3568272113800049
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,2,128,1,float16,float16,0,0.40674557685852053
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,2,128,1,fp8,fp8,0,0.35831360816955565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,4,128,1,float16,float16,0,0.4098351955413818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,2,128,1,fp8,fp8,0,0.6262127876281738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,4,128,1,float16,fp8,0,0.3573760032653809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,4,128,1,fp8,fp8,0,0.35318880081176757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,8,128,1,float16,float16,0,0.40893120765686036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,8,128,1,float16,fp8,0,0.3605295896530151
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,8,128,1,fp8,fp8,0,0.35463199615478513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,2,128,1,float16,fp8,0,0.35193920135498047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,1,128,1,fp8,fp8,0,2.8865968704223635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,1,128,1,float16,fp8,0,3.0328624725341795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,1,128,1,float16,float16,0,4.296308898925782
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,2,128,1,float16,float16,0,3.8200111389160156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,2,128,1,float16,fp8,0,3.1866559982299805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,2,128,1,fp8,fp8,0,3.2267921447753904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,4,128,1,float16,float16,0,4.7497600555419925
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,4,128,1,float16,fp8,0,2.8975664138793946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,4,128,1,fp8,fp8,0,3.0426847457885744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,8,128,1,float16,float16,0,4.625425720214844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,16,128,1,float16,float16,0,2.2371408462524416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,8,128,1,fp8,fp8,0,2.886387252807617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,16,128,1,float16,fp8,0,1.8051567077636719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,16,128,1,fp8,fp8,0,1.797719955444336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,1,128,1,float16,float16,0,1.6035888671875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,1,128,1,float16,fp8,0,1.5086607933044434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,1,128,1,fp8,fp8,0,1.491579246520996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,8,128,1,float16,fp8,0,2.977779197692871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,2,128,1,float16,float16,0,1.9989200592041017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,2,128,1,float16,fp8,0,1.513383960723877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,2,128,1,fp8,fp8,0,1.5635855674743653
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,4,128,1,float16,float16,0,1.6474992752075195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,4,128,1,float16,fp8,0,1.6965055465698242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,4,128,1,fp8,fp8,0,1.495574378967285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,16,128,1,float16,float16,0,0.9646176338195801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,8,128,1,float16,fp8,0,1.6243200302124023
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,8,128,1,fp8,fp8,0,1.4718976020812988
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,16,128,1,float16,fp8,0,0.8804896354675293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,16,128,1,fp8,fp8,0,0.8121711730957031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,1,128,1,float16,float16,0,0.8679887771606445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,1,128,1,float16,fp8,0,0.7700287818908691
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,1,128,1,fp8,fp8,0,0.7905055999755859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,2,128,1,float16,float16,0,0.8481200218200684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,8,128,1,float16,float16,0,2.1420448303222654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,2,128,1,float16,fp8,0,0.7864640235900879
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,2,128,1,fp8,fp8,0,0.8882047653198242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,4,128,1,float16,float16,0,0.8698399543762207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,4,128,1,float16,fp8,0,0.7766223907470703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,4,128,1,fp8,fp8,0,0.7848351955413818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,8,128,1,float16,float16,0,0.8855119705200195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,8,128,1,float16,fp8,0,0.7773407936096192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,8,128,1,fp8,fp8,0,0.7685296058654785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,16,128,1,float16,float16,0,0.5633391857147216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,16,128,1,float16,fp8,0,0.447111988067627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,16,128,1,fp8,fp8,0,0.4416463851928711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,1,128,1,float16,float16,0,0.5086480140686035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,1,128,1,float16,fp8,0,0.4308784008026123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,1,128,1,fp8,fp8,0,0.42370238304138186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,2,128,1,float16,float16,0,0.4888239860534668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,2,128,1,float16,fp8,0,0.43030238151550293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,4,128,1,float16,float16,0,0.48139681816101076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,4,128,1,float16,fp8,0,0.4303919792175293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,8,128,1,float16,float16,0,0.4910304069519043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,8,128,1,fp8,fp8,0,0.4478640079498291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,16,128,1,float16,float16,0,0.2889296054840088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,16,128,1,float16,fp8,0,0.25743999481201174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,16,128,1,fp8,fp8,0,0.25738399028778075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,1,128,1,float16,float16,0,0.2700239896774292
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,2,128,1,fp8,fp8,0,0.42559041976928713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,1,128,1,float16,fp8,0,0.24825119972229004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,1,128,1,fp8,fp8,0,0.2485327959060669
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,2,128,1,float16,float16,0,0.26997759342193606
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,2,128,1,float16,fp8,0,0.2484447956085205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,2,128,1,fp8,fp8,0,0.2468575954437256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,4,128,1,float16,float16,0,0.2729023933410645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,4,128,1,float16,fp8,0,0.2484816074371338
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,4,128,1,fp8,fp8,0,0.2482448101043701
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,8,128,1,float16,float16,0,0.2796416044235229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,8,128,1,float16,fp8,0,0.24819679260253907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,8,128,1,fp8,fp8,0,0.24700961112976075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,1,128,1,float16,fp8,0,1.7005392074584962
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,1,128,1,float16,float16,0,2.121334457397461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,8,128,1,float16,fp8,0,0.4327888011932373
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,1,128,1,fp8,fp8,0,1.7503583908081055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,2,128,1,float16,fp8,0,1.710380744934082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,2,128,1,float16,float16,0,2.5662559509277343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,2,128,1,fp8,fp8,0,1.7914400100708008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,4,128,1,fp8,fp8,0,0.43281121253967286
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,4,128,1,float16,float16,0,1.8920576095581054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,4,128,1,float16,fp8,0,1.9216447830200196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,4,128,1,fp8,fp8,0,1.8113040924072266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,16,128,1,float16,float16,0,1.07194242477417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,8,128,1,fp8,fp8,0,1.7201759338378906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,8,128,1,float16,fp8,0,2.067585563659668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,16,128,1,fp8,fp8,0,0.9572031974792481
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,16,128,1,float16,fp8,0,1.1319408416748047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,1,128,1,float16,float16,0,1.001688003540039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,1,128,1,float16,fp8,0,0.8821760177612304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,1,128,1,fp8,fp8,0,0.9186880111694335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,2,128,1,float16,float16,0,1.136995220184326
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,2,128,1,float16,fp8,0,0.8811216354370117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,2,128,1,fp8,fp8,0,0.8999664306640625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,8,128,1,float16,float16,0,1.9707839965820313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,4,128,1,float16,float16,0,1.3067440032958983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,4,128,1,float16,fp8,0,0.8787103652954101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,4,128,1,fp8,fp8,0,0.8803903579711914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,8,128,1,float16,float16,0,1.029147243499756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,16,128,1,float16,float16,0,0.5692944049835205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,8,128,1,fp8,fp8,0,0.878105640411377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,8,128,1,float16,fp8,0,1.2669376373291015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,16,128,1,float16,fp8,0,0.507206392288208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,16,128,1,fp8,fp8,0,0.5294847965240479
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,1,128,1,float16,fp8,0,0.48820958137512205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,1,128,1,fp8,fp8,0,0.4917744159698486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,2,128,1,float16,float16,0,0.5200384140014649
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,2,128,1,float16,fp8,0,0.4883264064788818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,2,128,1,fp8,fp8,0,0.4833712100982666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,4,128,1,float16,float16,0,0.5277455806732178
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,4,128,1,float16,fp8,0,0.4822688102722168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,4,128,1,fp8,fp8,0,0.48144960403442383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,8,128,1,float16,float16,0,0.5643184185028076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,8,128,1,float16,fp8,0,0.48140320777893064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,16,128,1,float16,float16,0,0.3345247983932495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,8,128,1,fp8,fp8,0,0.48200159072875975
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,16,128,1,fp8,fp8,0,0.29922559261322024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,1,128,1,float16,float16,0,0.2970880031585693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,1,128,1,float16,fp8,0,0.2829663991928101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,1,128,1,fp8,fp8,0,0.26973121166229247
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,2,128,1,float16,float16,0,0.29413280487060545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,1,128,1,float16,float16,0,0.5163519859313965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,2,128,1,float16,fp8,0,0.26899840831756594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,2,128,1,fp8,fp8,0,0.26721439361572263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,4,128,1,float16,float16,0,0.2934015989303589
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,4,128,1,float16,fp8,0,0.27008800506591796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,8,128,1,float16,float16,0,0.3011120080947876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,8,128,1,float16,fp8,0,0.27059359550476075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,8,128,1,fp8,fp8,0,0.2707887887954712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,16,128,1,float16,float16,0,0.1890928030014038
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,16,128,1,float16,fp8,0,0.1669935941696167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,16,128,1,float16,fp8,0,0.2848416090011597
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,16,128,1,fp8,fp8,0,0.16874719858169557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,1,128,1,float16,float16,0,0.17514400482177733
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,1,128,1,float16,fp8,0,0.16154240369796752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,1,128,1,fp8,fp8,0,0.15940480232238768
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,2,128,1,float16,float16,0,0.17264959812164307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,2,128,1,float16,fp8,0,0.16251039505004883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,2,128,1,fp8,fp8,0,0.1613360047340393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,4,128,1,float16,fp8,0,0.15980639457702636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,4,128,1,fp8,fp8,0,0.26721439361572263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,4,128,1,fp8,fp8,0,0.16023039817810059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,8,128,1,float16,float16,0,0.1760319948196411
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,8,128,1,float16,fp8,0,0.15968799591064453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,8,128,1,fp8,fp8,0,0.1568063974380493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,1,128,1,float16,float16,0,1.7521520614624024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,1,128,1,float16,fp8,0,1.6343887329101563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,4,128,1,float16,float16,0,0.17594079971313475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,1,128,1,fp8,fp8,0,1.6337055206298827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,2,128,1,float16,fp8,0,1.6643743515014648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,2,128,1,float16,float16,0,1.8294000625610352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,2,128,1,fp8,fp8,0,1.6508800506591796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,4,128,1,float16,float16,0,1.7363807678222656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,4,128,1,float16,fp8,0,1.683340835571289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,4,128,1,fp8,fp8,0,1.9667119979858398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,8,128,1,float16,float16,0,1.9824911117553712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,16,128,1,float16,float16,0,1.055564785003662
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,8,128,1,fp8,fp8,0,1.6384511947631837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,16,128,1,float16,fp8,0,1.0692208290100098
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,8,128,1,float16,fp8,0,2.1601823806762694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,1,128,1,float16,fp8,0,0.8569487571716309
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,1,128,1,float16,float16,0,0.9893631935119629
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,16,128,1,fp8,fp8,0,1.0257264137268067
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,1,128,1,fp8,fp8,0,0.8484175682067872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,2,128,1,float16,float16,0,0.9065567970275878
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,2,128,1,float16,fp8,0,0.9801728248596191
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,2,128,1,fp8,fp8,0,0.8538111686706543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,4,128,1,float16,float16,0,0.9043888092041016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,4,128,1,float16,fp8,0,0.8556207656860352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,4,128,1,fp8,fp8,0,0.8552607536315918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,8,128,1,float16,float16,0,0.9942864418029785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,8,128,1,float16,fp8,0,1.195902442932129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,16,128,1,float16,float16,0,0.5402847766876221
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,8,128,1,fp8,fp8,0,0.8480768203735352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,16,128,1,fp8,fp8,0,0.4799056053161621
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,1,128,1,float16,float16,0,0.47937440872192383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,1,128,1,float16,fp8,0,0.45419678688049314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,1,128,1,fp8,fp8,0,0.45148639678955077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,2,128,1,float16,float16,0,0.4796016216278076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,2,128,1,float16,fp8,0,0.4522687911987305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,2,128,1,fp8,fp8,0,0.4486688137054443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,4,128,1,float16,float16,0,0.5026336193084717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,4,128,1,float16,fp8,0,0.450710391998291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,4,128,1,fp8,fp8,0,0.4464992046356201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,8,128,1,float16,float16,0,0.5815792083740234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,8,128,1,float16,fp8,0,0.44970240592956545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,8,128,1,fp8,fp8,0,0.4449312210083008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,16,128,1,float16,fp8,0,0.26607840061187743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,16,128,1,fp8,fp8,0,0.26369280815124513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,1,128,1,float16,float16,0,0.267249608039856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,1,128,1,float16,fp8,0,0.24633920192718506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,1,128,1,fp8,fp8,0,0.2490607976913452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,16,128,1,float16,fp8,0,0.5914495944976806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,2,128,1,float16,float16,0,0.26210880279541016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,2,128,1,float16,fp8,0,0.24836480617523193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,2,128,1,fp8,fp8,0,0.2493232011795044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,4,128,1,float16,float16,0,0.26691839694976804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,4,128,1,float16,fp8,0,0.24872798919677735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,4,128,1,fp8,fp8,0,0.24964640140533448
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,8,128,1,float16,float16,0,0.27645599842071533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,8,128,1,float16,fp8,0,0.24796640872955322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,16,128,1,float16,float16,0,0.1667695999145508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,8,128,1,fp8,fp8,0,0.24991679191589355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,16,128,1,fp8,fp8,0,0.15204639434814454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,1,128,1,float16,float16,0,0.1526975989341736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,1,128,1,float16,fp8,0,0.1418336033821106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,1,128,1,fp8,fp8,0,0.1443951964378357
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,2,128,1,float16,float16,0,0.1501952052116394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,2,128,1,float16,fp8,0,0.1438976049423218
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,2,128,1,fp8,fp8,0,0.1419376015663147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,4,128,1,float16,float16,0,0.15547200441360473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,4,128,1,float16,fp8,0,0.14178719520568847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,16,128,1,float16,float16,0,0.3321295976638794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,4,128,1,fp8,fp8,0,0.14332799911499022
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,8,128,1,float16,float16,0,0.16066240072250365
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,8,128,1,float16,fp8,0,0.14233920574188233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,8,128,1,fp8,fp8,0,0.14290560483932496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,16,128,1,float16,float16,0,0.1025696039199829
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,16,128,1,float16,fp8,0,0.0948639988899231
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,16,128,1,fp8,fp8,0,0.09615839719772339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,1,128,1,float16,float16,0,0.09669119715690613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,1,128,1,float16,fp8,0,0.09023200273513794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,1,128,1,fp8,fp8,0,0.09055839776992798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,2,128,1,float16,float16,0,0.09602400064468383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,2,128,1,float16,fp8,0,0.09021440148353577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,2,128,1,fp8,fp8,0,0.09048159718513489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,4,128,1,float16,float16,0,0.09494879841804504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,4,128,1,float16,fp8,0,0.09152960181236267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,4,128,1,fp8,fp8,0,0.09076480269432068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,8,128,1,float16,float16,0,0.10035519599914551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,8,128,1,float16,fp8,0,0.09058560132980346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,8,128,1,fp8,fp8,0,0.0904528021812439
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,16,128,1,float16,fp8,0,0.15403679609298707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,1,128,1,float16,fp8,0,1.0109999656677247
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,1,128,1,float16,float16,0,1.0727104187011718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,1,128,1,fp8,fp8,0,1.0226207733154298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,2,128,1,float16,float16,0,1.0343615531921386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,2,128,1,float16,fp8,0,1.1838239669799804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,2,128,1,fp8,fp8,0,1.0211039543151856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,4,128,1,float16,fp8,0,1.018336009979248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,4,128,1,float16,float16,0,1.4907103538513184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,4,128,1,fp8,fp8,0,1.0191231727600099
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,8,128,1,float16,float16,0,1.1587648391723633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,8,128,1,float16,fp8,0,1.0407983779907226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,8,128,1,fp8,fp8,0,1.0174736022949218
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,16,128,1,fp8,fp8,0,0.5965328216552734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,16,128,1,float16,float16,0,0.6377488136291504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,1,128,1,float16,fp8,0,0.5327360153198242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,2,128,1,float16,float16,0,0.5454592227935791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,2,128,1,float16,fp8,0,0.5441967964172363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,2,128,1,fp8,fp8,0,0.5285247802734375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,4,128,1,float16,float16,0,0.5591279983520507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,1,128,1,float16,float16,0,0.5598911762237548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,4,128,1,float16,fp8,0,0.5256256103515625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,1,128,1,fp8,fp8,0,0.5295983791351319
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,8,128,1,float16,float16,0,0.5887631893157959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,8,128,1,float16,fp8,0,0.5220208168029785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,8,128,1,fp8,fp8,0,0.5274191856384277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,16,128,1,float16,float16,0,0.33992800712585447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,16,128,1,float16,fp8,0,0.31021919250488283
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,16,128,1,fp8,fp8,0,0.3089247941970825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,1,128,1,float16,float16,0,0.2951807975769043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,1,128,1,float16,fp8,0,0.28785440921783445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,1,128,1,fp8,fp8,0,0.28713600635528563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,2,128,1,float16,float16,0,0.29131040573120115
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,2,128,1,float16,fp8,0,0.2846992015838623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,16,128,1,float16,fp8,0,0.663262414932251
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,4,128,1,float16,float16,0,0.30224640369415284
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,4,128,1,float16,fp8,0,0.28627839088439944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,4,128,1,fp8,fp8,0,0.5315392017364502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,8,128,1,float16,float16,0,0.3218816041946411
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,8,128,1,float16,fp8,0,0.28527679443359377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,8,128,1,fp8,fp8,0,0.2868031978607178
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,16,128,1,float16,float16,0,0.1913807988166809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,16,128,1,float16,fp8,0,0.17392159700393678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,16,128,1,fp8,fp8,0,0.17316800355911255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,1,128,1,float16,float16,0,0.16943360567092897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,1,128,1,float16,fp8,0,0.15760159492492676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,1,128,1,fp8,fp8,0,0.15955519676208496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,2,128,1,float16,float16,0,0.1670672059059143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,2,128,1,float16,fp8,0,0.158571195602417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,2,128,1,fp8,fp8,0,0.15861120223999023
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,4,128,1,float16,float16,0,0.17173919677734376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,4,128,1,float16,fp8,0,0.1592079997062683
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,4,128,1,fp8,fp8,0,0.15905760526657103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,8,128,1,float16,float16,0,0.18044480085372924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,8,128,1,float16,fp8,0,0.16003999710083008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,2,128,1,fp8,fp8,0,0.28497920036315916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,8,128,1,fp8,fp8,0,0.16034719944000245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,16,128,1,float16,float16,0,0.11240960359573364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,16,128,1,fp8,fp8,0,0.10065599679946899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,1,128,1,float16,fp8,0,0.0974831998348236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,1,128,1,fp8,fp8,0,0.09348800182342529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,2,128,1,float16,float16,0,0.09716320037841797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,2,128,1,float16,fp8,0,0.09290720224380493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,2,128,1,fp8,fp8,0,0.09292799830436707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,4,128,1,float16,float16,0,0.09922879934310913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,4,128,1,float16,fp8,0,0.09295200109481812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,4,128,1,fp8,fp8,0,0.09281600117683411
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,8,128,1,float16,float16,0,0.10428160429000854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,8,128,1,float16,fp8,0,0.09284800291061401
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,8,128,1,fp8,fp8,0,0.09394879937171936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,16,128,1,float16,float16,0,0.06619200110435486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,16,128,1,float16,fp8,0,0.06374719738960266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,16,128,1,fp8,fp8,0,0.06373440027236939
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,1,128,1,float16,float16,0,0.06281440258026123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,16,128,1,float16,fp8,0,0.09944319725036621
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,1,128,1,float16,fp8,0,0.06023520231246948
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,1,128,1,fp8,fp8,0,0.0597760021686554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,2,128,1,float16,float16,0,0.06267200112342834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,1,128,1,float16,float16,0,0.09690399765968323
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,2,128,1,float16,fp8,0,0.05969439744949341
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,2,128,1,fp8,fp8,0,0.060819202661514284
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,4,128,1,float16,float16,0,0.06515359878540039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,4,128,1,fp8,fp8,0,0.2832112073898315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,4,128,1,float16,fp8,0,0.059736001491546634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,4,128,1,fp8,fp8,0,0.06218879818916321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,8,128,1,float16,fp8,0,0.059971201419830325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,8,128,1,fp8,fp8,0,0.059627199172973634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,1,128,1,float16,fp8,0,1.0273632049560546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,1,128,1,fp8,fp8,0,1.0237008094787599
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,2,128,1,float16,float16,0,1.0181360244750977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,2,128,1,float16,fp8,0,1.021671962738037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,4,128,1,float16,float16,0,1.0422863960266113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,8,128,1,float16,float16,0,0.06567519903182983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,2,128,1,fp8,fp8,0,1.2032447814941407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,4,128,1,float16,fp8,0,1.0779135704040528
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,4,128,1,fp8,fp8,0,1.0283856391906738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,8,128,1,float16,float16,0,1.2358575820922852
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,8,128,1,float16,fp8,0,1.0260576248168944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,8,128,1,fp8,fp8,0,1.0749183654785157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,16,128,1,float16,float16,0,0.6701216220855712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,16,128,1,float16,fp8,0,0.6109392166137695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,16,128,1,fp8,fp8,0,0.604804801940918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,1,128,1,float16,float16,0,0.5321280002593994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,1,128,1,float16,float16,0,1.036032009124756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,1,128,1,float16,fp8,0,0.5394288063049316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,1,128,1,fp8,fp8,0,0.533076810836792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,2,128,1,float16,float16,0,0.5400527954101563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,2,128,1,float16,fp8,0,0.5402336120605469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,2,128,1,fp8,fp8,0,0.5311520099639893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,4,128,1,float16,float16,0,0.5566751956939697
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,4,128,1,float16,fp8,0,0.5382736206054688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,4,128,1,fp8,fp8,0,0.5294511795043946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,8,128,1,float16,float16,0,0.5923600196838379
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,8,128,1,float16,fp8,0,0.5350768089294433
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,8,128,1,fp8,fp8,0,0.5295135974884033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,16,128,1,float16,float16,0,0.34495038986206056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,16,128,1,float16,fp8,0,0.32151041030883787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,16,128,1,fp8,fp8,0,0.32032480239868166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,1,128,1,float16,float16,0,0.2823904037475586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,1,128,1,float16,fp8,0,0.2817840099334717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,1,128,1,fp8,fp8,0,0.2892927885055542
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,2,128,1,float16,fp8,0,0.28216478824615476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,2,128,1,float16,float16,0,0.28826398849487306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,2,128,1,fp8,fp8,0,0.28113598823547364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,4,128,1,float16,float16,0,0.2971951961517334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,4,128,1,float16,fp8,0,0.28085598945617674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,4,128,1,fp8,fp8,0,0.2856816053390503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,8,128,1,float16,float16,0,0.3095328092575073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,8,128,1,fp8,fp8,0,0.28066239356994627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,16,128,1,float16,float16,0,0.19083679914474488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,16,128,1,float16,fp8,0,0.1734992027282715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,1,128,1,float16,float16,0,0.16042239665985109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,1,128,1,float16,fp8,0,0.15271040201187133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,1,128,1,fp8,fp8,0,0.15512959957122802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,2,128,1,float16,float16,0,0.1541440010070801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,2,128,1,float16,fp8,0,0.15549440383911134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,2,128,1,fp8,fp8,0,0.1521504044532776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,4,128,1,float16,float16,0,0.1635424017906189
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,4,128,1,float16,fp8,0,0.15243200063705445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,4,128,1,fp8,fp8,0,0.15556639432907104
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,8,128,1,float16,float16,0,0.1689136028289795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,8,128,1,fp8,fp8,0,0.15189599990844727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,16,128,1,float16,float16,0,0.10452640056610107
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,16,128,1,float16,fp8,0,0.09676799774169922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,16,128,1,fp8,fp8,0,0.09669600129127502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,1,128,1,float16,float16,0,0.08761119842529297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,1,128,1,float16,fp8,0,0.0864512026309967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,8,128,1,float16,fp8,0,0.28617119789123535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,1,128,1,fp8,fp8,0,0.08627039790153504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,2,128,1,float16,float16,0,0.0884447991847992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,16,128,1,fp8,fp8,0,0.17134879827499389
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,2,128,1,float16,fp8,0,0.08645439743995667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,2,128,1,fp8,fp8,0,0.08651360273361205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,4,128,1,float16,float16,0,0.09267839789390564
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,4,128,1,float16,fp8,0,0.0863215982913971
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,8,128,1,float16,float16,0,0.09814559817314147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,8,128,1,float16,fp8,0,0.0863759994506836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,8,128,1,fp8,fp8,0,0.08678879737854003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,16,128,1,float16,float16,0,0.06215839982032776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,16,128,1,float16,fp8,0,0.05979679822921753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,16,128,1,fp8,fp8,0,0.059824001789093015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,1,128,1,float16,float16,0,0.05607680082321167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,1,128,1,float16,fp8,0,0.05545759797096252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,1,128,1,fp8,fp8,0,0.055486398935317996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,2,128,1,float16,float16,0,0.056587201356887815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,2,128,1,float16,fp8,0,0.055508798360824584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,2,128,1,fp8,fp8,0,0.05710560083389282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,8,128,1,float16,fp8,0,0.15496480464935303
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,4,128,1,float16,fp8,0,0.055638402700424194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,4,128,1,fp8,fp8,0,0.05568959712982178
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,8,128,1,float16,float16,0,0.059671998023986816
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,8,128,1,float16,fp8,0,0.05578240156173706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,8,128,1,fp8,fp8,0,0.05557760000228882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,16,128,1,float16,float16,0,0.03919999897480011
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,16,128,1,float16,fp8,0,0.037089601159095764
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,16,128,1,fp8,fp8,0,0.03916800022125244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,1,128,1,float16,float16,0,0.03531840145587921
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,1,128,1,float16,fp8,0,0.03656800091266632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,1,128,1,fp8,fp8,0,0.03501920104026794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,2,128,1,float16,float16,0,0.03712320029735565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,2,128,1,float16,fp8,0,0.035123199224472046
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,2,128,1,fp8,fp8,0,0.03660320043563843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,4,128,1,float16,float16,0,0.03699840009212494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,4,128,1,float16,fp8,0,0.036759999394416806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,4,128,1,fp8,fp8,0,0.035180801153182985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,8,128,1,float16,float16,0,0.038987201452255246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,8,128,1,float16,fp8,0,0.035139200091362
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,8,128,1,fp8,fp8,0,0.03562560081481934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,4,128,1,fp8,fp8,0,0.08647840023040772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,1,128,1,float16,float16,0,0.6393727779388427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,1,128,1,float16,fp8,0,0.6733967781066894
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,1,128,1,fp8,fp8,0,0.670692777633667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,2,128,1,float16,float16,0,0.6308144092559814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,2,128,1,float16,fp8,0,0.6691743850708007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,2,128,1,fp8,fp8,0,0.6683584213256836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,4,128,1,float16,float16,0,0.6601632118225098
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,4,128,1,float16,fp8,0,0.6645872116088867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,4,128,1,fp8,fp8,0,0.6589920043945312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,8,128,1,float16,float16,0,0.7325200080871582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,8,128,1,float16,fp8,0,0.6569375991821289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,8,128,1,fp8,fp8,0,0.6621888160705567
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,16,128,1,float16,float16,0,0.43361759185791016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,16,128,1,float16,fp8,0,0.4014592170715332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,4,128,1,float16,float16,0,0.05777279734611511
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,16,128,1,fp8,fp8,0,0.3982687950134277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,1,128,1,float16,float16,0,0.34546239376068116
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,1,128,1,fp8,fp8,0,0.3473759889602661
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,2,128,1,float16,float16,0,0.3406160116195679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,2,128,1,float16,fp8,0,0.34633920192718504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,2,128,1,fp8,fp8,0,0.3504672050476074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,4,128,1,float16,float16,0,0.34933760166168215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,4,128,1,float16,fp8,0,0.34859039783477785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,4,128,1,fp8,fp8,0,0.3446079969406128
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,8,128,1,float16,float16,0,0.3818624019622803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,8,128,1,float16,fp8,0,0.3421760082244873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,8,128,1,fp8,fp8,0,0.34606399536132815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,16,128,1,float16,float16,0,0.22854399681091309
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,16,128,1,fp8,fp8,0,0.21100161075592042
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,1,128,1,float16,float16,0,0.18403840065002441
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,1,128,1,float16,fp8,0,0.18480000495910645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,1,128,1,fp8,fp8,0,0.18758560419082643
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,2,128,1,float16,float16,0,0.1801632046699524
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,1,128,1,float16,fp8,0,0.34922239780426023
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,2,128,1,float16,fp8,0,0.18781759738922119
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,2,128,1,fp8,fp8,0,0.18486399650573732
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,4,128,1,float16,float16,0,0.18840160369873046
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,4,128,1,float16,fp8,0,0.18661279678344728
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,8,128,1,float16,float16,0,0.20237278938293457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,8,128,1,float16,fp8,0,0.18691200017929077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,8,128,1,fp8,fp8,0,0.18267199993133545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,16,128,1,float16,float16,0,0.12624479532241822
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,16,128,1,float16,fp8,0,0.1159600019454956
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,16,128,1,fp8,fp8,0,0.11717760562896729
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,1,128,1,float16,fp8,0,0.10098079442977906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,1,128,1,fp8,fp8,0,0.10083199739456176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,2,128,1,float16,float16,0,0.10293920040130615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,2,128,1,float16,fp8,0,0.10085920095443726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,2,128,1,fp8,fp8,0,0.10067039728164673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,4,128,1,float16,float16,0,0.10564960241317749
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,4,128,1,float16,fp8,0,0.10103199481964112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,4,128,1,fp8,fp8,0,0.10119839906692504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,8,128,1,float16,float16,0,0.11442879438400269
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,8,128,1,float16,fp8,0,0.10251519680023194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,4,128,1,fp8,fp8,0,0.1845792055130005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,8,128,1,fp8,fp8,0,0.10263839960098267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,16,128,1,float16,float16,0,0.07208639979362488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,16,128,1,float16,fp8,0,0.06795200109481811
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,16,128,1,fp8,fp8,0,0.06745280027389526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,1,128,1,float16,float16,0,0.06053119897842407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,1,128,1,float16,fp8,0,0.06022080183029175
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,1,128,1,fp8,fp8,0,0.0610975980758667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,2,128,1,float16,float16,0,0.060070401430130003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,1,128,1,float16,float16,0,0.10084960460662842
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,2,128,1,fp8,fp8,0,0.06052640080451965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,4,128,1,float16,float16,0,0.061791998147964475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,4,128,1,float16,fp8,0,0.06066079735755921
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,4,128,1,fp8,fp8,0,0.060343998670578006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,8,128,1,float16,float16,0,0.0664799988269806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,8,128,1,float16,fp8,0,0.060924798250198364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,8,128,1,fp8,fp8,0,0.06080160140991211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,16,128,1,float16,fp8,0,0.04318079948425293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,16,128,1,fp8,fp8,0,0.04317440092563629
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,1,128,1,float16,float16,0,0.04004000127315521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,1,128,1,float16,fp8,0,0.03920960128307342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,16,128,1,float16,fp8,0,0.2139456033706665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,1,128,1,fp8,fp8,0,0.039150398969650266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,2,128,1,float16,float16,0,0.04097599983215332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,2,128,1,float16,fp8,0,0.039534398913383485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,2,128,1,fp8,fp8,0,0.039422398805618285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,4,128,1,float16,float16,0,0.041233599185943604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,4,128,1,fp8,fp8,0,0.03921439945697784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,8,128,1,float16,float16,0,0.04331679940223694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,8,128,1,float16,fp8,0,0.039166399836540224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,8,128,1,fp8,fp8,0,0.039134401082992556
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,16,128,1,float16,float16,0,0.030969598889350893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,16,128,1,float16,fp8,0,0.030926400423049928
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,16,128,1,fp8,fp8,0,0.030825600028038025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,1,128,1,float16,float16,0,0.028886398673057555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,2,128,1,float16,fp8,0,0.06154400110244751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,1,128,1,fp8,fp8,0,0.029039999842643736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,2,128,1,float16,float16,0,0.029016000032424927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,2,128,1,float16,fp8,0,0.028995200991630554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,16,128,1,float16,float16,0,0.043300798535346983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,4,128,1,float16,float16,0,0.02898559868335724
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,4,128,1,float16,fp8,0,0.028892800211906433
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,4,128,1,fp8,fp8,0,0.02892799973487854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,8,128,1,float16,float16,0,0.030852800607681273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,8,128,1,float16,fp8,0,0.028881600499153136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,8,128,1,fp8,fp8,0,0.028935998678207397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,4,128,1,float16,fp8,0,0.03931199908256531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,1,128,1,float16,float16,0,0.6683695793151856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,1,128,1,float16,fp8,0,0.7239776134490967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,1,128,1,fp8,fp8,0,0.7178207874298096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,1,128,1,float16,fp8,0,0.028883200883865357
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,2,128,1,float16,float16,0,0.6707968235015869
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,2,128,1,float16,fp8,0,0.7184256076812744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,2,128,1,fp8,fp8,0,0.029047998785972595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,2,128,1,fp8,fp8,0,0.7146895885467529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,4,128,1,float16,float16,0,0.7009488105773926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,4,128,1,fp8,fp8,0,0.7130512237548828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,8,128,1,float16,float16,0,0.7800127983093261
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,8,128,1,float16,fp8,0,0.7128928184509278
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,16,128,1,float16,float16,0,0.4796559810638428
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,8,128,1,fp8,fp8,0,0.7332191944122315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,16,128,1,float16,fp8,0,0.44089441299438475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,16,128,1,fp8,fp8,0,0.44148478507995603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,1,128,1,float16,float16,0,0.3455984115600586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,1,128,1,float16,fp8,0,0.3747407913208008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,1,128,1,fp8,fp8,0,0.37378880977630613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,2,128,1,float16,float16,0,0.34454879760742185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,4,128,1,float16,fp8,0,0.7202896118164063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,2,128,1,float16,fp8,0,0.37258400917053225
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,2,128,1,fp8,fp8,0,0.37298879623413084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,4,128,1,float16,float16,0,0.37084639072418213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,4,128,1,float16,fp8,0,0.37064480781555176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,4,128,1,fp8,fp8,0,0.36941120624542234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,8,128,1,float16,float16,0,0.40540318489074706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,8,128,1,fp8,fp8,0,0.3694224119186401
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,16,128,1,float16,float16,0,0.2517024040222168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,16,128,1,float16,fp8,0,0.2320319890975952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,16,128,1,fp8,fp8,0,0.23353919982910157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,1,128,1,fp8,fp8,0,0.1987712025642395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,1,128,1,float16,fp8,0,0.1990928053855896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,2,128,1,float16,float16,0,0.1851248025894165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,2,128,1,float16,fp8,0,0.19849599599838258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,2,128,1,fp8,fp8,0,0.19843839406967162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,4,128,1,float16,float16,0,0.19786720275878905
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,4,128,1,float16,fp8,0,0.19925600290298462
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,4,128,1,fp8,fp8,0,0.1990847945213318
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,8,128,1,float16,fp8,0,0.19718400239944459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,8,128,1,fp8,fp8,0,0.19706079959869385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,16,128,1,float16,float16,0,0.13679360151290892
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,16,128,1,float16,fp8,0,0.1270848035812378
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,16,128,1,fp8,fp8,0,0.12622560262680055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,1,128,1,float16,float16,0,0.18435360193252565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,1,128,1,float16,float16,0,0.10406559705734253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,1,128,1,float16,fp8,0,0.10471199750900269
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,1,128,1,fp8,fp8,0,0.10693119764328003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,2,128,1,float16,float16,0,0.10134079456329345
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,2,128,1,float16,fp8,0,0.10714240074157715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,2,128,1,fp8,fp8,0,0.10554239749908448
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,4,128,1,float16,float16,0,0.10892319679260254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,4,128,1,fp8,fp8,0,0.10730719566345215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,8,128,1,float16,float16,0,0.11923840045928955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,8,128,1,float16,float16,0,0.21457440853118898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,8,128,1,float16,fp8,0,0.10839840173721313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,8,128,1,fp8,fp8,0,0.1082800030708313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,16,128,1,float16,float16,0,0.07635520100593567
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,16,128,1,float16,fp8,0,0.07064800262451172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,16,128,1,fp8,fp8,0,0.07263039946556091
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,1,128,1,float16,float16,0,0.05823040008544922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,1,128,1,float16,fp8,0,0.06103839874267578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,1,128,1,fp8,fp8,0,0.060227197408676145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,2,128,1,float16,float16,0,0.059305602312088014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,2,128,1,float16,fp8,0,0.05964959859848022
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,8,128,1,float16,fp8,0,0.3697472095489502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,2,128,1,fp8,fp8,0,0.06065599918365479
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,4,128,1,float16,float16,0,0.060705602169036865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,4,128,1,float16,fp8,0,0.05967040061950683
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,4,128,1,fp8,fp8,0,0.06021440029144287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,8,128,1,float16,float16,0,0.06700800061225891
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,8,128,1,float16,fp8,0,0.05973759889602661
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,16,128,1,float16,float16,0,0.044782400131225586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,16,128,1,float16,fp8,0,0.0427264004945755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,16,128,1,fp8,fp8,0,0.0432096004486084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,1,128,1,float16,float16,0,0.03707520067691803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,1,128,1,float16,fp8,0,0.03912639915943146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,1,128,1,fp8,fp8,0,0.03807680010795593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,2,128,1,float16,float16,0,0.03757280111312866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,2,128,1,float16,fp8,0,0.03818880021572113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,2,128,1,fp8,fp8,0,0.03907679915428162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,4,128,1,float16,float16,0,0.038980799913406375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,4,128,1,float16,fp8,0,0.039052799344062805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,4,128,1,fp8,fp8,0,0.03892799913883209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,8,128,1,float16,fp8,0,0.038929599523544314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,4,128,1,float16,fp8,0,0.10520479679107667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,8,128,1,fp8,fp8,0,0.03886080086231232
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,16,128,1,float16,float16,0,0.02683520019054413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,16,128,1,float16,fp8,0,0.026822400093078614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,16,128,1,fp8,fp8,0,0.026764801144599913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,1,128,1,float16,float16,0,0.022935999929904936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,1,128,1,float16,fp8,0,0.0247871994972229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,1,128,1,fp8,fp8,0,0.024774399399757386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,2,128,1,float16,float16,0,0.024795199930667877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,2,128,1,float16,fp8,0,0.02481440007686615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,2,128,1,fp8,fp8,0,0.02481919974088669
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,4,128,1,float16,float16,0,0.024825599789619446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,4,128,1,float16,fp8,0,0.02481440007686615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,4,128,1,fp8,fp8,0,0.024784000217914583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,8,128,1,float16,float16,0,0.02528960108757019
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,8,128,1,float16,fp8,0,0.024798400700092316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,16,128,1,float16,float16,0,0.024697600305080412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,16,128,1,float16,fp8,0,0.02476480007171631
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,16,128,1,fp8,fp8,0,0.024828800559043886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,1,128,1,float16,float16,0,0.022731199860572815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,8,128,1,fp8,fp8,0,0.061080002784729005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,1,128,1,float16,fp8,0,0.02292640060186386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,1,128,1,fp8,fp8,0,0.02290399968624115
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,2,128,1,float16,float16,0,0.023001599311828613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,2,128,1,float16,fp8,0,0.023001599311828613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,8,128,1,float16,float16,0,0.041233599185943604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,2,128,1,fp8,fp8,0,0.023214399814605713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,4,128,1,float16,float16,0,0.022944000363349915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,4,128,1,float16,fp8,0,0.023049600422382355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,8,128,1,float16,float16,0,0.02478239983320236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,8,128,1,float16,fp8,0,0.02306559979915619
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,8,128,1,fp8,fp8,0,0.022892799973487855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,8,128,1,fp8,fp8,0,0.024825599789619446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,1,128,1,float16,float16,0,0.5000095844268799
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,1,128,1,float16,fp8,0,0.560814380645752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,1,128,1,fp8,fp8,0,0.5662208080291748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,2,128,1,float16,float16,0,0.4846831798553467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,2,128,1,float16,fp8,0,0.5629424095153809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,2,128,1,fp8,fp8,0,0.5583663940429687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,4,128,1,fp8,fp8,0,0.023003199696540834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,4,128,1,float16,float16,0,0.533793592453003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,4,128,1,float16,fp8,0,0.556502389907837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,4,128,1,fp8,fp8,0,0.5627024173736572
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,8,128,1,float16,fp8,0,0.5544960021972656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,8,128,1,float16,float16,0,0.6095808029174805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,16,128,1,float16,float16,0,0.39187040328979494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,16,128,1,float16,fp8,0,0.3601135969161987
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,8,128,1,fp8,fp8,0,0.5549871921539307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,16,128,1,fp8,fp8,0,0.3631599903106689
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,1,128,1,float16,float16,0,0.2578223943710327
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,1,128,1,float16,fp8,0,0.29186720848083497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,1,128,1,fp8,fp8,0,0.2909071922302246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,2,128,1,float16,float16,0,0.25695679187774656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,2,128,1,float16,fp8,0,0.2946367979049683
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,2,128,1,fp8,fp8,0,0.29085121154785154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,4,128,1,float16,float16,0,0.2715456008911133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,4,128,1,float16,fp8,0,0.2942239999771118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,4,128,1,fp8,fp8,0,0.29138400554656985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,8,128,1,float16,float16,0,0.309768009185791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,8,128,1,float16,fp8,0,0.287558388710022
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,16,128,1,float16,float16,0,0.20371360778808595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,16,128,1,float16,fp8,0,0.1885215997695923
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,1,128,1,float16,float16,0,0.13623679876327516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,16,128,1,fp8,fp8,0,0.18833279609680176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,1,128,1,float16,fp8,0,0.15332640409469606
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,1,128,1,fp8,fp8,0,0.15261919498443605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,2,128,1,float16,float16,0,0.13485599756240846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,2,128,1,float16,fp8,0,0.15256160497665405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,2,128,1,fp8,fp8,0,0.15222079753875734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,4,128,1,float16,fp8,0,0.15143680572509766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,4,128,1,fp8,fp8,0,0.15195679664611816
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,8,128,1,float16,float16,0,0.16336480379104615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,8,128,1,float16,fp8,0,0.15160640478134155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,8,128,1,fp8,fp8,0,0.1515455961227417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,16,128,1,float16,float16,0,0.10803200006484985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,16,128,1,float16,fp8,0,0.10066879987716675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,16,128,1,fp8,fp8,0,0.10186879634857178
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,1,128,1,float16,float16,0,0.07415199875831605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,1,128,1,float16,fp8,0,0.08216480016708375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,1,128,1,fp8,fp8,0,0.08018239736557006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,2,128,1,float16,float16,0,0.07567359805107117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,2,128,1,float16,fp8,0,0.08131359815597534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,2,128,1,fp8,fp8,0,0.08217920064926147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,4,128,1,float16,fp8,0,0.08335360288619995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,4,128,1,fp8,fp8,0,0.08305760025978089
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,8,128,1,float16,float16,0,0.0909168004989624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,8,128,1,float16,fp8,0,0.08379520177841186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,8,128,1,fp8,fp8,0,0.29075040817260744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,8,128,1,fp8,fp8,0,0.08383039832115173
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,16,128,1,float16,float16,0,0.06044800281524658
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,16,128,1,float16,fp8,0,0.05743839740753174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,1,128,1,float16,float16,0,0.04313920140266418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,1,128,1,float16,fp8,0,0.045556798577308655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,4,128,1,float16,float16,0,0.1438655972480774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,1,128,1,fp8,fp8,0,0.04548639953136444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,2,128,1,float16,float16,0,0.041889598965644835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,2,128,1,float16,fp8,0,0.04540480077266693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,2,128,1,fp8,fp8,0,0.04537439942359924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,4,128,1,float16,float16,0,0.0454479992389679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,4,128,1,float16,fp8,0,0.04536640048027039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,4,128,1,fp8,fp8,0,0.045535999536514285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,8,128,1,float16,float16,0,0.05161280035972595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,8,128,1,float16,fp8,0,0.04647040069103241
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,8,128,1,fp8,fp8,0,0.045788800716400145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,16,128,1,float16,fp8,0,0.03433600068092346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,16,128,1,fp8,fp8,0,0.034246399998664856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,1,128,1,float16,float16,0,0.028911998867988585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,1,128,1,float16,fp8,0,0.028990399837493897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,4,128,1,float16,float16,0,0.07943840026855468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,1,128,1,fp8,fp8,0,0.028908801078796387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,2,128,1,float16,float16,0,0.02839039862155914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,2,128,1,float16,fp8,0,0.028969600796699524
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,2,128,1,fp8,fp8,0,0.029071998596191407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,4,128,1,float16,float16,0,0.028889599442481994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,4,128,1,float16,fp8,0,0.029016000032424927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,4,128,1,fp8,fp8,0,0.028992000222206115
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,8,128,1,float16,float16,0,0.030969598889350893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,8,128,1,float16,fp8,0,0.028905600309371948
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,16,128,1,float16,float16,0,0.020657600462436677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,16,128,1,float16,fp8,0,0.02080959975719452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,16,128,1,fp8,fp8,0,0.05743839740753174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,16,128,1,fp8,fp8,0,0.02136159986257553
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,1,128,1,float16,float16,0,0.019043199717998505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,1,128,1,float16,fp8,0,0.01889919936656952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,1,128,1,fp8,fp8,0,0.019145600497722626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,2,128,1,float16,float16,0,0.018691200017929076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,2,128,1,float16,fp8,0,0.018931199610233308
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,16,128,1,float16,float16,0,0.0350383996963501
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,2,128,1,fp8,fp8,0,0.018831999599933626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,4,128,1,float16,float16,0,0.018812799453735353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,4,128,1,float16,fp8,0,0.01876319944858551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,4,128,1,fp8,fp8,0,0.018878400325775146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,8,128,1,float16,float16,0,0.020715199410915375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,8,128,1,float16,fp8,0,0.01873439997434616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,8,128,1,fp8,fp8,0,0.018849599361419677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,16,128,1,float16,float16,0,0.018649600446224213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,16,128,1,float16,fp8,0,0.018862399458885192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,16,128,1,fp8,fp8,0,0.018670399487018586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,1,128,1,float16,float16,0,0.016812799870967864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,1,128,1,float16,fp8,0,0.016756799817085267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,2,128,1,float16,float16,0,0.01671839952468872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,8,128,1,fp8,fp8,0,0.02887679934501648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,2,128,1,float16,fp8,0,0.016659200191497803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,2,128,1,fp8,fp8,0,0.016628800332546233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,4,128,1,float16,float16,0,0.017020800709724428
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,4,128,1,float16,fp8,0,0.016748799383640288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,4,128,1,fp8,fp8,0,0.017875200510025023
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,8,128,1,float16,float16,0,0.017291200160980225
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,8,128,1,fp8,fp8,0,0.01669600009918213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,16,128,1,float16,float16,0,0.01677920073270798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,16,128,1,float16,fp8,0,0.016728000342845918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,16,128,1,fp8,fp8,0,0.016663999855518342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,1,128,1,float16,float16,0,0.016710400581359863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,1,128,1,float16,fp8,0,0.016697600483894348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,1,128,1,fp8,fp8,0,0.016740800440311433
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,2,128,1,float16,float16,0,0.016740800440311433
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,2,128,1,float16,fp8,0,0.016702400147914888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,4,128,1,float16,float16,0,0.016633599996566772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,2,128,1,fp8,fp8,0,0.017049600183963776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,4,128,1,float16,fp8,0,0.01663520038127899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,8,128,1,float16,float16,0,0.01666239947080612
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,8,128,1,float16,fp8,0,0.01658399999141693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,8,128,1,fp8,fp8,0,0.01659359931945801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,1,128,1,fp8,fp8,0,0.01682559996843338
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,1,128,1,float16,float16,0,0.2046191930770874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,1,128,1,float16,fp8,0,0.24695041179656982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,1,128,1,fp8,fp8,0,0.24625918865203858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,2,128,1,float16,float16,0,0.20388479232788087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,8,128,1,float16,fp8,0,0.0182096004486084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,2,128,1,float16,fp8,0,0.24522719383239747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,2,128,1,fp8,fp8,0,0.24706559181213378
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,4,128,1,float16,float16,0,0.22484960556030273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,4,128,1,float16,fp8,0,0.24417600631713868
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,4,128,1,fp8,fp8,0,0.24531199932098388
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,8,128,1,float16,fp8,0,0.24279839992523194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,8,128,1,fp8,fp8,0,0.24339520931243896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,16,128,1,float16,float16,0,0.1760256052017212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,16,128,1,float16,fp8,0,0.1647600054740906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,16,128,1,fp8,fp8,0,0.16323519945144654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,1,128,1,float16,float16,0,0.11045119762420655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,1,128,1,float16,fp8,0,0.1290320038795471
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,1,128,1,fp8,fp8,0,0.12989280223846436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,2,128,1,float16,float16,0,0.10890400409698486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,2,128,1,float16,fp8,0,0.1299023985862732
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,2,128,1,fp8,fp8,0,0.12776000499725343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,4,128,1,float16,float16,0,0.12030880451202393
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,4,128,1,float16,fp8,0,0.1273311972618103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,4,128,1,fp8,fp8,0,0.13005919456481935
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,8,128,1,float16,float16,0,0.13754080533981322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,8,128,1,float16,fp8,0,0.12932319641113282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,8,128,1,fp8,fp8,0,0.1272271990776062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,16,128,1,float16,float16,0,0.09508479833602905
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,16,128,1,float16,fp8,0,0.08634719848632813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,1,128,1,float16,float16,0,0.05973280072212219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,1,128,1,float16,fp8,0,0.06665440201759339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,1,128,1,fp8,fp8,0,0.06766880154609681
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,2,128,1,float16,float16,0,0.059841597080230714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,2,128,1,float16,fp8,0,0.06775360107421875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,2,128,1,fp8,fp8,0,0.06779519915580749
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,4,128,1,float16,float16,0,0.0646511971950531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,4,128,1,float16,fp8,0,0.06824960112571717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,4,128,1,fp8,fp8,0,0.06831200122833252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,8,128,1,float16,float16,0,0.07433120012283326
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,8,128,1,float16,fp8,0,0.06845600008964539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,4,128,1,fp8,fp8,0,0.016689600050449373
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,8,128,1,fp8,fp8,0,0.06793919801712037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,16,128,1,float16,float16,0,0.05349439978599548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,16,128,1,float16,fp8,0,0.04960319995880127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,16,128,1,fp8,fp8,0,0.049460801482200625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,1,128,1,float16,float16,0,0.03499679863452911
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,1,128,1,float16,fp8,0,0.03919839859008789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,1,128,1,fp8,fp8,0,0.03915199935436249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,2,128,1,float16,float16,0,0.03510560095310211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,2,128,1,float16,fp8,0,0.03920640051364899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,2,128,1,fp8,fp8,0,0.039124798774719236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,4,128,1,float16,float16,0,0.037539198994636536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,4,128,1,float16,fp8,0,0.039139199256896975
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,4,128,1,fp8,fp8,0,0.03919520080089569
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,8,128,1,float16,float16,0,0.04352000057697296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,8,128,1,float16,fp8,0,0.03931199908256531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,8,128,1,fp8,fp8,0,0.039201599359512326
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,16,128,1,float16,float16,0,0.027065598964691163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,16,128,1,float16,fp8,0,0.028916800022125246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,16,128,1,fp8,fp8,0,0.02908799946308136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,8,128,1,float16,float16,0,0.26288321018218996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,1,128,1,float16,float16,0,0.022305600345134735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,1,128,1,float16,fp8,0,0.025080001354217528
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,2,128,1,float16,float16,0,0.022731199860572815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,2,128,1,float16,fp8,0,0.024905599653720856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,2,128,1,fp8,fp8,0,0.043905600905418396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,4,128,1,float16,float16,0,0.022865599393844603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,4,128,1,float16,fp8,0,0.024910399317741395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,4,128,1,fp8,fp8,0,0.0249551996588707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,8,128,1,float16,float16,0,0.0251120001077652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,8,128,1,float16,fp8,0,0.024886399507522583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,8,128,1,fp8,fp8,0,0.025102400779724122
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,16,128,1,fp8,fp8,0,0.08692479729652405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,16,128,1,float16,float16,0,0.016923199594020843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,16,128,1,float16,fp8,0,0.01892800033092499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,16,128,1,fp8,fp8,0,0.018934400379657747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,1,128,1,float16,float16,0,0.014699199795722961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,1,128,1,float16,fp8,0,0.01687999963760376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,1,128,1,fp8,fp8,0,0.016764800250530242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,2,128,1,float16,fp8,0,0.016675199568271636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,2,128,1,fp8,fp8,0,0.016780799627304076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,1,128,1,fp8,fp8,0,0.02481919974088669
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,4,128,1,float16,float16,0,0.016524800658226015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,4,128,1,fp8,fp8,0,0.016675199568271636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,4,128,1,float16,fp8,0,0.016638399660587312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,8,128,1,float16,float16,0,0.01674239933490753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,8,128,1,float16,fp8,0,0.01669919937849045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,8,128,1,fp8,fp8,0,0.016700799763202667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,16,128,1,float16,fp8,0,0.016579200327396394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,16,128,1,fp8,fp8,0,0.016663999855518342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,1,128,1,float16,float16,0,0.014708800613880158
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,1,128,1,float16,fp8,0,0.014748799800872802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,1,128,1,fp8,fp8,0,0.014734399318695069
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,2,128,1,float16,float16,0,0.014584000408649444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,2,128,1,float16,fp8,0,0.014691199362277984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,2,128,1,fp8,fp8,0,0.014703999459743499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,4,128,1,float16,float16,0,0.014577600359916686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,4,128,1,fp8,fp8,0,0.014716799557209014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,4,128,1,float16,fp8,0,0.015080000460147857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,8,128,1,float16,float16,0,0.01491519957780838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,8,128,1,fp8,fp8,0,0.014750400185585022
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,8,128,1,float16,fp8,0,0.0150751993060112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,16,128,1,float16,float16,0,0.014590400457382201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,16,128,1,fp8,fp8,0,0.01462559998035431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,16,128,1,float16,fp8,0,0.014833599328994751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,1,128,1,float16,float16,0,0.014502400159835815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,1,128,1,fp8,fp8,0,0.014382399618625641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,1,128,1,float16,fp8,0,0.014798399806022645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,2,128,1,float16,float16,0,0.014567999541759491
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,2,128,1,fp8,fp8,0,0.01446560025215149
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,4,128,1,float16,float16,0,0.01451520025730133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,2,128,1,float16,fp8,0,0.015060800313949584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,4,128,1,float16,fp8,0,0.014580799639225006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,8,128,1,float16,float16,0,0.014659200608730317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,4,128,1,fp8,fp8,0,0.014820800721645355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,8,128,1,float16,fp8,0,0.014660799503326416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,16,128,1,float16,float16,0,0.014553600549697876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,8,128,1,fp8,fp8,0,0.014800000190734863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,16,128,1,float16,float16,0,0.014660799503326416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,16,128,1,fp8,fp8,0,0.014772799611091614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,1,128,1,float16,float16,0,0.014659200608730317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,1,128,1,float16,fp8,0,0.013952000439167023
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,2,128,1,float16,float16,0,0.014744000136852264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,2,128,1,float16,fp8,0,0.014771200716495514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,2,128,1,fp8,fp8,0,0.01586720049381256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,4,128,1,float16,float16,0,0.014763200283050537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,4,128,1,float16,fp8,0,0.014727999269962311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,4,128,1,fp8,fp8,0,0.01481119990348816
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,8,128,1,float16,float16,0,0.014824000000953675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,8,128,1,float16,fp8,0,0.0127920001745224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,8,128,1,fp8,fp8,0,0.012788799405097962
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,1,128,1,float16,float16,0,0.12899359464645385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,2,128,1,float16,float16,0,0.014947199821472168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,1,128,1,float16,fp8,0,0.14798400402069092
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,1,128,1,fp8,fp8,0,0.1486575961112976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,2,128,1,float16,float16,0,0.1280624032020569
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,2,128,1,float16,fp8,0,0.1487936019897461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,2,128,1,fp8,fp8,0,0.14808160066604614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,16,128,1,float16,fp8,0,0.014643199741840363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,4,128,1,float16,float16,0,0.1390112042427063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,4,128,1,float16,fp8,0,0.1474496006965637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,8,128,1,float16,float16,0,0.15775680541992188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,8,128,1,float16,fp8,0,0.14651999473571778
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,8,128,1,fp8,fp8,0,0.14818880558013917
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,16,128,1,float16,float16,0,0.10175520181655884
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,16,128,1,float16,fp8,0,0.09721599817276001
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,16,128,1,fp8,fp8,0,0.09676480293273926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,1,128,1,float16,float16,0,0.0705407977104187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,1,128,1,float16,fp8,0,0.07816479802131653
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,1,128,1,fp8,fp8,0,0.07832159996032714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,2,128,1,float16,float16,0,0.07017279863357544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,2,128,1,float16,fp8,0,0.0789359986782074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,4,128,1,float16,float16,0,0.07607679963111877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,4,128,1,float16,fp8,0,0.08028479814529418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,4,128,1,fp8,fp8,0,0.08016960024833679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,8,128,1,float16,float16,0,0.08495839834213256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,1,128,1,fp8,fp8,0,0.012606400251388549
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,4,128,1,fp8,fp8,0,0.14880479574203492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,8,128,1,float16,fp8,0,0.07830399870872498
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,8,128,1,fp8,fp8,0,0.08027999997138976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,16,128,1,float16,float16,0,0.054179197549819945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,16,128,1,float16,fp8,0,0.05351679921150208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,1,128,1,float16,float16,0,0.03915199935436249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,1,128,1,float16,fp8,0,0.043201598525047305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,1,128,1,fp8,fp8,0,0.04324159920215607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,2,128,1,float16,float16,0,0.039208000898361205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,2,128,1,float16,fp8,0,0.04328480064868927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,2,128,1,fp8,fp8,0,0.07816479802131653
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,2,128,1,fp8,fp8,0,0.043249601125717164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,4,128,1,float16,float16,0,0.041464000940322876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,4,128,1,fp8,fp8,0,0.04239839911460876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,8,128,1,float16,float16,0,0.04743039906024933
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,8,128,1,float16,fp8,0,0.04341759979724884
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,8,128,1,fp8,fp8,0,0.043347200751304625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,16,128,1,float16,float16,0,0.031040000915527343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,16,128,1,float16,fp8,0,0.031094399094581605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,16,128,1,fp8,fp8,0,0.031121599674224853
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,1,128,1,float16,float16,0,0.02492160052061081
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,1,128,1,float16,fp8,0,0.027054399251937866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,16,128,1,fp8,fp8,0,0.051545602083206174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,1,128,1,fp8,fp8,0,0.026830399036407472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,2,128,1,float16,float16,0,0.024828800559043886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,2,128,1,float16,fp8,0,0.02689119875431061
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,2,128,1,fp8,fp8,0,0.02689119875431061
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,4,128,1,float16,float16,0,0.024823999404907225
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,4,128,1,float16,fp8,0,0.026822400093078614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,4,128,1,fp8,fp8,0,0.02686080038547516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,8,128,1,float16,float16,0,0.026939201354980468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,4,128,1,float16,fp8,0,0.04336479902267456
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,8,128,1,fp8,fp8,0,0.026875200867652892
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,16,128,1,float16,float16,0,0.01871519982814789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,16,128,1,float16,fp8,0,0.018718400597572328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,16,128,1,fp8,fp8,0,0.01973759979009628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,1,128,1,float16,fp8,0,0.016686399281024934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,1,128,1,float16,float16,0,0.016680000722408293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,2,128,1,float16,float16,0,0.016599999368190767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,2,128,1,float16,fp8,0,0.016771200299263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,2,128,1,fp8,fp8,0,0.016729600727558136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,4,128,1,float16,float16,0,0.01672320067882538
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,4,128,1,float16,fp8,0,0.016607999801635742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,8,128,1,float16,float16,0,0.016732800006866454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,8,128,1,float16,fp8,0,0.016595199704170227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,8,128,1,fp8,fp8,0,0.01661760061979294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,16,128,1,float16,float16,0,0.012495999783277511
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,16,128,1,float16,fp8,0,0.014552000164985656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,16,128,1,fp8,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,1,128,1,float16,float16,0,0.012439999729394913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,1,128,1,float16,fp8,0,0.012467200309038163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,1,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,2,128,1,float16,float16,0,0.01252480000257492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,2,128,1,float16,fp8,0,0.012454400211572647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,2,128,1,fp8,fp8,0,0.012454400211572647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,4,128,1,float16,float16,0,0.012488000094890594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,4,128,1,float16,fp8,0,0.012651200592517852
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,8,128,1,float16,fp8,0,0.0268528014421463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,4,128,1,fp8,fp8,0,0.01255359947681427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,8,128,1,float16,float16,0,0.012484800070524216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,8,128,1,float16,fp8,0,0.012516799569129943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,8,128,1,fp8,fp8,0,0.012486399710178375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,16,128,1,float16,float16,0,0.012492799758911132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,16,128,1,float16,fp8,0,0.012478400021791458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,16,128,1,fp8,fp8,0,0.012521600723266602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,1,128,1,float16,float16,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,1,128,1,fp8,fp8,0,0.01866399943828583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,1,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,1,128,1,fp8,fp8,0,0.012479999661445617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,2,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,2,128,1,fp8,fp8,0,0.012561599910259246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,4,128,1,float16,float16,0,0.010502400249242783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,4,128,1,float16,fp8,0,0.01247360035777092
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,4,128,1,fp8,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,8,128,1,float16,float16,0,0.01250240057706833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,8,128,1,float16,fp8,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,8,128,1,fp8,fp8,0,0.01247360035777092
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,16,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,16,128,1,float16,fp8,0,0.01180799975991249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,16,128,1,fp8,fp8,0,0.010635200142860412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,1,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,1,128,1,fp8,fp8,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,2,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,2,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,2,128,1,fp8,fp8,0,0.010542400181293488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,4,128,1,float16,float16,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,4,128,1,float16,fp8,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,4,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,8,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,8,128,1,float16,fp8,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,8,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,16,128,1,float16,float16,0,0.010598400235176086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,16,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,16,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,1,128,1,float16,float16,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,1,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,2,128,1,float16,float16,0,0.01236959993839264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,1,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,2,128,1,float16,float16,0,0.010734400153160096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,2,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,4,128,1,fp8,fp8,0,0.016732800006866454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,4,128,1,float16,float16,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,4,128,1,float16,fp8,0,0.010564800351858139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,1,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,4,128,1,fp8,fp8,0,0.010590399801731109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,8,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,8,128,1,float16,fp8,0,0.010619200021028518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,8,128,1,fp8,fp8,0,0.010579200088977813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,1,128,1,float16,fp8,0,0.11292799711227416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,1,128,1,float16,float16,0,0.10731680393218994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,1,128,1,fp8,fp8,0,0.11296960115432739
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,2,128,1,float16,float16,0,0.10740799903869629
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,2,128,1,float16,fp8,0,0.11310399770736694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,2,128,1,fp8,fp8,0,0.1142416000366211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,4,128,1,float16,float16,0,0.11125760078430176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,4,128,1,float16,fp8,0,0.11410399675369262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,4,128,1,fp8,fp8,0,0.11304160356521606
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,8,128,1,float16,float16,0,0.12117919921875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,8,128,1,float16,fp8,0,0.11285439729690552
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,8,128,1,fp8,fp8,0,0.11314239501953124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,16,128,1,float16,float16,0,0.0718288004398346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,16,128,1,float16,fp8,0,0.07185919880867005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,1,128,1,float16,float16,0,0.058764797449111936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,1,128,1,float16,fp8,0,0.06105120182037353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,1,128,1,fp8,fp8,0,0.06134080290794373
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,2,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,2,128,1,float16,float16,0,0.05778719782829285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,2,128,1,float16,fp8,0,0.059614402055740354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,2,128,1,fp8,fp8,0,0.0613647997379303
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,4,128,1,float16,float16,0,0.05979200005531311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,4,128,1,float16,fp8,0,0.06174719929695129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,4,128,1,fp8,fp8,0,0.059601598978042604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,8,128,1,float16,float16,0,0.06619200110435486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,8,128,1,fp8,fp8,0,0.06164640188217163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,16,128,1,float16,float16,0,0.03922559916973114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,16,128,1,float16,fp8,0,0.03913280069828033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,16,128,1,fp8,fp8,0,0.0391072005033493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,1,128,1,float16,float16,0,0.03303839862346649
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,1,128,1,float16,fp8,0,0.0349263995885849
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,1,128,1,fp8,fp8,0,0.03488959968090057
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,2,128,1,float16,float16,0,0.03313600122928619
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,2,128,1,float16,fp8,0,0.03504799902439117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,16,128,1,fp8,fp8,0,0.06984320282936096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,4,128,1,float16,float16,0,0.03329919874668121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,2,128,1,fp8,fp8,0,0.03531199991703034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,4,128,1,float16,fp8,0,0.03329440057277679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,4,128,1,fp8,fp8,0,0.035017600655555724
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,8,128,1,float16,fp8,0,0.033206400275230405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,8,128,1,float16,float16,0,0.036289599537849423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,16,128,1,float16,float16,0,0.022878399491310118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,16,128,1,float16,fp8,0,0.024801599979400634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,16,128,1,fp8,fp8,0,0.02476799935102463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,1,128,1,float16,float16,0,0.020883199572563172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,1,128,1,float16,fp8,0,0.02266560047864914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,1,128,1,fp8,fp8,0,0.022782400250434875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,2,128,1,float16,float16,0,0.020895999670028687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,2,128,1,float16,fp8,0,0.02264000028371811
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,2,128,1,fp8,fp8,0,0.022646400332450866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,4,128,1,float16,float16,0,0.023003199696540834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,8,128,1,float16,fp8,0,0.06025599837303162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,4,128,1,fp8,fp8,0,0.022708800435066224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,8,128,1,float16,float16,0,0.022753599286079406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,8,128,1,float16,fp8,0,0.022657600045204163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,8,128,1,fp8,fp8,0,0.022702400386333466
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,16,128,1,float16,float16,0,0.01669600009918213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,16,128,1,float16,fp8,0,0.016705599427223206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,16,128,1,fp8,fp8,0,0.016673600673675536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,1,128,1,float16,fp8,0,0.014856000244617463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,1,128,1,fp8,fp8,0,0.014632000029087067
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,2,128,1,float16,float16,0,0.014684799313545226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,2,128,1,float16,fp8,0,0.015460799634456634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,2,128,1,fp8,fp8,0,0.015836800634860992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,4,128,1,float16,float16,0,0.014703999459743499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,4,128,1,float16,fp8,0,0.014630399644374847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,4,128,1,fp8,fp8,0,0.014636799693107605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,8,128,1,float16,float16,0,0.016358399391174318
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,8,128,1,fp8,fp8,0,0.03495360016822815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,8,128,1,fp8,fp8,0,0.014734399318695069
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,8,128,1,float16,fp8,0,0.014679999649524688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,16,128,1,float16,float16,0,0.012481600046157837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,16,128,1,fp8,fp8,0,0.012529599666595458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,1,128,1,float16,float16,0,0.012483199685811996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,1,128,1,float16,fp8,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,4,128,1,float16,fp8,0,0.022742399573326112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,2,128,1,float16,float16,0,0.011828800290822982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,2,128,1,float16,fp8,0,0.012465599924325943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,2,128,1,fp8,fp8,0,0.012417600303888322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,4,128,1,float16,float16,0,0.011931200325489045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,4,128,1,float16,fp8,0,0.012486399710178375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,4,128,1,fp8,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,8,128,1,float16,float16,0,0.012531200051307678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,8,128,1,float16,fp8,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,8,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,16,128,1,float16,float16,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,16,128,1,float16,fp8,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,16,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,1,128,1,float16,float16,0,0.010503999888896942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,1,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,1,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,16,128,1,float16,fp8,0,0.012540799379348756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,2,128,1,float16,float16,0,0.010579200088977813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,2,128,1,float16,fp8,0,0.01067200005054474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,2,128,1,fp8,fp8,0,0.010652799904346467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,4,128,1,float16,float16,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,4,128,1,float16,fp8,0,0.010707200318574906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,4,128,1,fp8,fp8,0,0.010807999968528747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,8,128,1,float16,float16,0,0.011057599633932113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,1,128,1,float16,float16,0,0.01451839953660965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,8,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,8,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,16,128,1,float16,float16,0,0.010678400099277497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,16,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,1,128,1,float16,float16,0,0.010542400181293488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,1,128,1,float16,fp8,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,1,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,2,128,1,float16,float16,0,0.01064639985561371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,2,128,1,float16,fp8,0,0.010664000362157821
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,2,128,1,fp8,fp8,0,0.010684800148010255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,4,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,4,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,4,128,1,fp8,fp8,0,0.010655999928712846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,8,128,1,float16,float16,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,8,128,1,float16,fp8,0,0.010659199953079224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,8,128,1,fp8,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,16,128,1,float16,float16,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,16,128,1,float16,fp8,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,1,128,1,fp8,fp8,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,16,128,1,fp8,fp8,0,0.010644800215959548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,1,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,1,128,1,fp8,fp8,0,0.010731200128793717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,2,128,1,float16,float16,0,0.01061279997229576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,2,128,1,float16,fp8,0,0.010579200088977813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,2,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,4,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,16,128,1,float16,fp8,0,0.010625600069761276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,4,128,1,float16,fp8,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,4,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,8,128,1,float16,float16,0,0.01053600013256073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,8,128,1,float16,fp8,0,0.010332799702882766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,8,128,1,fp8,fp8,0,0.010764800012111664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,1,128,1,float16,float16,0,0.09261919856071472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,2,128,1,float16,float16,0,0.09274880290031433
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,1,128,1,float16,fp8,0,0.09470080137252808
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,1,128,1,fp8,fp8,0,0.09511680006980897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,2,128,1,float16,fp8,0,0.09456959962844849
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,2,128,1,fp8,fp8,0,0.09517760276794433
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,4,128,1,float16,float16,0,0.09675999879837036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,4,128,1,float16,fp8,0,0.09463520050048828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,4,128,1,fp8,fp8,0,0.09489279985427856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,8,128,1,float16,float16,0,0.10140000581741333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,8,128,1,float16,fp8,0,0.09460319876670838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,8,128,1,fp8,fp8,0,0.09506880044937134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,16,128,1,float16,float16,0,0.05803040266036987
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,16,128,1,float16,fp8,0,0.05560160279273987
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,1,128,1,float16,float16,0,0.050393599271774295
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,1,128,1,float16,fp8,0,0.051451200246810914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,1,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,1,128,1,fp8,fp8,0,0.05171040296554565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,2,128,1,float16,float16,0,0.051718401908874514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,2,128,1,float16,fp8,0,0.05145279765129089
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,2,128,1,fp8,fp8,0,0.05180320143699646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,4,128,1,float16,float16,0,0.05178719758987427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,4,128,1,float16,fp8,0,0.05146399736404419
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,4,128,1,fp8,fp8,0,0.0514959990978241
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,8,128,1,float16,float16,0,0.0538640022277832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,8,128,1,float16,fp8,0,0.051520001888275144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,16,128,1,float16,float16,0,0.03174079954624176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,16,128,1,float16,fp8,0,0.032948800921440126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,16,128,1,fp8,fp8,0,0.031222400069236756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,1,128,1,float16,float16,0,0.030964800715446474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,1,128,1,float16,fp8,0,0.029271999001502992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,1,128,1,fp8,fp8,0,0.03087199926376343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,2,128,1,float16,float16,0,0.029203200340270997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,2,128,1,float16,fp8,0,0.030830401182174682
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,2,128,1,fp8,fp8,0,0.029044800996780397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,4,128,1,float16,float16,0,0.03087199926376343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,4,128,1,fp8,fp8,0,0.030823999643325807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,8,128,1,float16,float16,0,0.0311055988073349
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,8,128,1,float16,fp8,0,0.030844798684120177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,8,128,1,fp8,fp8,0,0.030988800525665283
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,16,128,1,float16,float16,0,0.020803199708461763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,16,128,1,float16,fp8,0,0.02091519981622696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,16,128,1,fp8,fp8,0,0.020822399854660036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,16,128,1,fp8,fp8,0,0.057062399387359616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,1,128,1,float16,float16,0,0.020604799687862396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,1,128,1,float16,fp8,0,0.02069759964942932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,1,128,1,fp8,fp8,0,0.020619200170040132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,2,128,1,float16,float16,0,0.02059520035982132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,2,128,1,float16,fp8,0,0.020694400370121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,2,128,1,fp8,fp8,0,0.02069920003414154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,4,128,1,float16,float16,0,0.020657600462436677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,4,128,1,float16,fp8,0,0.020614400506019592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,4,128,1,fp8,fp8,0,0.020630399882793426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,8,128,1,float16,float16,0,0.020796799659729005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,8,128,1,float16,fp8,0,0.020695999264717102
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,8,128,1,fp8,fp8,0,0.020678399503231047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,16,128,1,float16,float16,0,0.014608000218868256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,8,128,1,fp8,fp8,0,0.0516431987285614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,16,128,1,fp8,fp8,0,0.014727999269962311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,1,128,1,float16,float16,0,0.014545600116252898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,1,128,1,float16,fp8,0,0.014646400511264802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,1,128,1,fp8,fp8,0,0.01467680037021637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,2,128,1,float16,float16,0,0.014679999649524688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,2,128,1,float16,fp8,0,0.014521600306034088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,4,128,1,float16,fp8,0,0.029153600335121155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,2,128,1,fp8,fp8,0,0.01467359960079193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,4,128,1,float16,float16,0,0.014636799693107605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,4,128,1,float16,fp8,0,0.014689600467681885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,4,128,1,fp8,fp8,0,0.014574399590492249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,8,128,1,float16,float16,0,0.014686399698257446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,8,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,8,128,1,fp8,fp8,0,0.014711999893188476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,16,128,1,float16,fp8,0,0.010689599812030793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,16,128,1,fp8,fp8,0,0.010702399909496308
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,1,128,1,float16,float16,0,0.0108255997300148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,1,128,1,float16,fp8,0,0.010601600259542465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,1,128,1,fp8,fp8,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,2,128,1,float16,float16,0,0.01061599999666214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,2,128,1,float16,fp8,0,0.010625600069761276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,16,128,1,float16,fp8,0,0.014697599411010741
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,2,128,1,fp8,fp8,0,0.010779199749231338
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,4,128,1,float16,float16,0,0.010737600177526474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,4,128,1,float16,fp8,0,0.010689599812030793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,4,128,1,fp8,fp8,0,0.010732799768447876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,8,128,1,float16,fp8,0,0.010705599933862687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,8,128,1,float16,float16,0,0.010678400099277497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,8,128,1,fp8,fp8,0,0.010667199641466141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,16,128,1,float16,float16,0,0.01066880002617836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,16,128,1,fp8,fp8,0,0.010628800094127654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,1,128,1,float16,float16,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,1,128,1,float16,fp8,0,0.010595200210809707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,1,128,1,fp8,fp8,0,0.010579200088977813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,16,128,1,float16,float16,0,0.011451199650764465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,2,128,1,float16,fp8,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,2,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,4,128,1,float16,float16,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,4,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,4,128,1,fp8,fp8,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,8,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,8,128,1,float16,fp8,0,0.010344000160694122
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,8,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,16,128,1,float16,float16,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,16,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,16,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,1,128,1,float16,fp8,0,0.011132799834012986
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,1,128,1,fp8,fp8,0,0.010639999806880952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,2,128,1,float16,float16,0,0.010590399801731109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,2,128,1,float16,fp8,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,2,128,1,fp8,fp8,0,0.010567999631166457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,4,128,1,float16,float16,0,0.010542400181293488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,16,128,1,float16,fp8,0,0.010729599744081497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,4,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,8,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,8,128,1,float16,fp8,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,8,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,2,128,1,float16,float16,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,16,128,1,float16,fp8,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,16,128,1,fp8,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,1,128,1,float16,float16,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,1,128,1,float16,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,1,128,1,fp8,fp8,0,0.010979200154542923
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,2,128,1,float16,float16,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,1,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,2,128,1,float16,fp8,0,0.01053759977221489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,2,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,4,128,1,float16,float16,0,0.010609599947929382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,4,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,4,128,1,fp8,fp8,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,8,128,1,float16,fp8,0,0.010596799850463866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,8,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,1,128,1,float16,float16,0,0.08870880007743835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,4,128,1,fp8,fp8,0,0.01064160019159317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,1,128,1,fp8,fp8,0,0.08644480109214783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,2,128,1,float16,float16,0,0.08972319960594177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,2,128,1,float16,fp8,0,0.0866096019744873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,2,128,1,fp8,fp8,0,0.08649280071258544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,4,128,1,float16,float16,0,0.09094560146331787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,4,128,1,float16,fp8,0,0.08643199801445008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,4,128,1,fp8,fp8,0,0.08635200262069702
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,8,128,1,float16,float16,0,0.09431840181350708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,8,128,1,float16,fp8,0,0.0864799976348877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,8,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,8,128,1,fp8,fp8,0,0.08649280071258544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,16,128,1,float16,float16,0,0.05149440169334411
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,16,128,1,float16,fp8,0,0.05141760110855102
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,16,128,1,fp8,fp8,0,0.050988799333572386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,1,128,1,float16,float16,0,0.0498416006565094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,1,128,1,float16,fp8,0,0.04816479980945587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,1,128,1,fp8,fp8,0,0.04901759922504425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,2,128,1,float16,float16,0,0.04955039918422699
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,2,128,1,float16,fp8,0,0.04912799894809723
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,2,128,1,fp8,fp8,0,0.047993600368499756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,1,128,1,float16,fp8,0,0.08663520216941833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,4,128,1,float16,float16,0,0.05101119875907898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,4,128,1,float16,fp8,0,0.047907200455665586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,8,128,1,float16,float16,0,0.051692801713943484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,8,128,1,float16,fp8,0,0.048003199696540835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,8,128,1,fp8,fp8,0,0.04755200147628784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,16,128,1,float16,float16,0,0.03091680109500885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,16,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,16,128,1,float16,fp8,0,0.02905279994010925
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,16,128,1,fp8,fp8,0,0.028960001468658448
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,1,128,1,float16,float16,0,0.02919679880142212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,1,128,1,float16,fp8,0,0.02895039916038513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,1,128,1,fp8,fp8,0,0.028918400406837463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,2,128,1,float16,fp8,0,0.02895039916038513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,2,128,1,fp8,fp8,0,0.028964799642562867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,4,128,1,float16,float16,0,0.030456000566482545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,4,128,1,float16,fp8,0,0.02908959984779358
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,4,128,1,fp8,fp8,0,0.02890079915523529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,8,128,1,float16,float16,0,0.0310479998588562
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,8,128,1,float16,fp8,0,0.028883200883865357
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,8,128,1,fp8,fp8,0,0.02903839945793152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,16,128,1,float16,float16,0,0.02075359970331192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,16,128,1,float16,fp8,0,0.02082560062408447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,16,128,1,fp8,fp8,0,0.02038239985704422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,4,128,1,fp8,fp8,0,0.047547200322151185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,1,128,1,float16,float16,0,0.020654399693012238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,1,128,1,float16,fp8,0,0.018724800646305086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,1,128,1,fp8,fp8,0,0.018880000710487364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,2,128,1,float16,float16,0,0.018739199638366698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,2,128,1,float16,fp8,0,0.01884479969739914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,2,128,1,fp8,fp8,0,0.018742400407791137
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,4,128,1,float16,float16,0,0.02080000042915344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,4,128,1,float16,fp8,0,0.01876319944858551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,2,128,1,float16,float16,0,0.029172798991203307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,8,128,1,float16,float16,0,0.020798400044441223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,8,128,1,fp8,fp8,0,0.018801599740982056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,8,128,1,float16,fp8,0,0.018892799317836762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,16,128,1,float16,float16,0,0.01467680037021637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,16,128,1,float16,fp8,0,0.014577600359916686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,16,128,1,fp8,fp8,0,0.014696000516414643
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,1,128,1,float16,float16,0,0.014404800534248353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,1,128,1,float16,fp8,0,0.012611199915409089
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,2,128,1,float16,float16,0,0.013288000226020813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,2,128,1,float16,fp8,0,0.012731200456619263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,2,128,1,fp8,fp8,0,0.012656000256538392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,4,128,1,float16,float16,0,0.01420000046491623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,4,128,1,float16,fp8,0,0.012671999633312225
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,4,128,1,fp8,fp8,0,0.012708799540996551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,8,128,1,float16,float16,0,0.014646400511264802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,8,128,1,float16,fp8,0,0.014294399321079254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,8,128,1,fp8,fp8,0,0.014684799313545226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,16,128,1,float16,float16,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,16,128,1,float16,fp8,0,0.010611200332641601
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,4,128,1,fp8,fp8,0,0.0188511997461319
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,16,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,1,128,1,float16,float16,0,0.010593599826097488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,1,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,1,128,1,fp8,fp8,0,0.010649599879980088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,2,128,1,float16,float16,0,0.010705599933862687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,2,128,1,float16,fp8,0,0.010771200060844421
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,4,128,1,float16,float16,0,0.010679999738931656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,1,128,1,fp8,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,4,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,4,128,1,fp8,fp8,0,0.010715200006961823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,8,128,1,float16,fp8,0,0.010664000362157821
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,8,128,1,fp8,fp8,0,0.010632000118494033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,16,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,16,128,1,float16,fp8,0,0.010652799904346467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,16,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,1,128,1,float16,float16,0,0.010564800351858139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,1,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,1,128,1,fp8,fp8,0,0.010502400249242783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,2,128,1,float16,float16,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,2,128,1,float16,fp8,0,0.01061599999666214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,2,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,4,128,1,float16,fp8,0,0.010649599879980088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,4,128,1,float16,float16,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,4,128,1,fp8,fp8,0,0.01053600013256073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,8,128,1,float16,float16,0,0.010608000308275222
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,8,128,1,float16,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,8,128,1,fp8,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,16,128,1,float16,float16,0,0.010654400289058685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,16,128,1,float16,fp8,0,0.010655999928712846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,16,128,1,fp8,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,1,128,1,float16,float16,0,0.010700800269842149
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,1,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,1,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,2,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,2,128,1,float16,fp8,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,2,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,4,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,4,128,1,float16,fp8,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,4,128,1,fp8,fp8,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,8,128,1,float16,float16,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,8,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,8,128,1,fp8,fp8,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,16,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,8,128,1,float16,float16,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,16,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,16,128,1,fp8,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,1,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,1,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,2,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,2,128,1,float16,fp8,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,2,128,1,fp8,fp8,0,0.010595200210809707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,4,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,4,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,4,128,1,fp8,fp8,0,0.01058719977736473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,8,128,1,float16,float16,0,0.010571199655532836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,8,128,1,float16,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,8,128,1,fp8,fp8,0,0.010567999631166457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,0,0.08857280015945435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,0,0.08221279978752136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,1,128,1,fp8,fp8,0,0.08223999738693237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,0,0.08764479756355285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,0,0.0820367991924286
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,2,128,1,fp8,fp8,0,0.08225600123405456
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,2,128,1,fp8,fp8,0,0.01061279997229576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,0,0.08841919898986816
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,4,128,1,fp8,fp8,0,0.08224800229072571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,0,0.082259202003479
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,0,0.08841599822044373
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,0,0.08221279978752136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,1,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,0,0.04720160067081451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,16,128,1,fp8,fp8,0,0.047356799244880676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,0,0.04963360130786896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,0,0.04677920043468475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,1,128,1,fp8,fp8,0,0.04637120068073273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,0,0.04949440062046051
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,0,0.04647200107574463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,2,128,1,fp8,fp8,0,0.046351999044418335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,0,0.049558401107788086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,0,0.045505601167678836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,4,128,1,fp8,fp8,0,0.045393601059913635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,0,0.049502399563789365
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,0,0.04538559913635254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,8,128,1,fp8,fp8,0,0.045638400316238406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,0,0.029028800129890443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,0,0.027011200785636902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,0,0.049351999163627626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,0,0.02890399992465973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,16,128,1,fp8,fp8,0,0.027009600400924684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,0,0.026958400011062623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,1,128,1,fp8,fp8,0,0.026921600103378296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,0,0.029028800129890443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,0,0.027031999826431275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,0,0.029016000032424927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,0,0.026921600103378296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,4,128,1,fp8,fp8,0,0.027088001370429993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,0,0.02908160090446472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,0,0.02689119875431061
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,8,128,1,fp8,fp8,0,0.026979199051856993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,0,0.018764799833297728
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,0,0.018775999546051025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,16,128,1,fp8,fp8,0,0.018806399405002595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,0,0.018798400461673737
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,0,0.018539200723171233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,1,128,1,fp8,fp8,0,0.018587200343608855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,0,0.01908160001039505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,0,0.01870400011539459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,2,128,1,fp8,fp8,0,0.01873439997434616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,0,0.01886560022830963
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,0,0.01873439997434616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,4,128,1,fp8,fp8,0,0.01870719939470291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,0,0.019460800290107726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,0,0.018755200505256652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,8,128,1,fp8,fp8,0,0.018755200505256652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,0,0.014580799639225006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,0,0.014579200744628906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,16,128,1,fp8,fp8,0,0.014532800018787383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,0,0.014684799313545226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,0,0.014569599926471711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,1,128,1,fp8,fp8,0,0.01456640064716339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,0,0.014679999649524688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,2,128,1,fp8,fp8,0,0.014545600116252898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,0,0.01459999978542328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,0,0.012803199887275695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,8,128,1,fp8,fp8,0,0.08228800296783448
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,4,128,1,fp8,fp8,0,0.014420799911022186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,0,0.014392000436782838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,0,0.014484800398349762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,8,128,1,fp8,fp8,0,0.01430879980325699
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,0,0.010953599959611893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,0,0.010627199709415436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,16,128,1,fp8,fp8,0,0.010732799768447876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,0,0.011475200206041336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,1,128,1,fp8,fp8,0,0.010583999752998351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,2,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,0,0.01241919994354248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,4,128,1,fp8,fp8,0,0.010583999752998351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,0,0.011825600266456604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,8,128,1,fp8,fp8,0,0.010542400181293488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,16,128,1,fp8,fp8,0,0.01077599972486496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,0,0.010636799782514573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,1,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,0,0.01066880002617836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,2,128,1,fp8,fp8,0,0.010569600015878677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,0,0.013107199966907502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,4,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,8,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,2,128,1,fp8,fp8,0,0.02688960134983063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,16,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,1,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,2,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,4,128,1,fp8,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,8,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,16,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,1,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,2,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,0,0.010364799946546554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,4,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,8,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,1,128,1,float16,fp8,0,4.3174785614013675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,1,128,1,fp8,fp8,0,4.218814468383789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,1,128,1,float16,float16,0,6.045993423461914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,2,128,1,float16,float16,0,5.8034721374511715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,2,128,1,float16,fp8,0,4.366424179077148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,2,128,1,fp8,fp8,0,4.14685287475586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,4,128,1,float16,float16,0,6.658257293701172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,4,128,1,float16,fp8,0,4.316984176635742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,4,128,1,fp8,fp8,0,4.449257659912109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,1,128,1,float16,float16,0,3.082815933227539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,12,128,1,float16,fp8,0,2.1405567169189452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,1,128,1,float16,fp8,0,2.1694623947143556
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,12,128,1,fp8,fp8,0,2.1576080322265625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,1,128,1,fp8,fp8,0,2.0729248046875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,2,128,1,float16,float16,0,2.5484687805175783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,2,128,1,float16,fp8,0,2.167094421386719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,2,128,1,fp8,fp8,0,2.0789888381958006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,4,128,1,float16,fp8,0,2.1525264739990235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,4,128,1,fp8,fp8,0,2.0891904830932617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,4,128,1,float16,float16,0,4.174635314941407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,12,128,1,float16,fp8,0,1.1957615852355956
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,1,128,1,float16,float16,0,1.2234288215637208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,1,128,1,float16,fp8,0,1.1308688163757323
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,1,128,1,fp8,fp8,0,1.1042112350463866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,12,128,1,fp8,fp8,0,1.324619197845459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,2,128,1,float16,float16,0,1.2239007949829102
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,2,128,1,float16,fp8,0,1.1064847946166991
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,2,128,1,fp8,fp8,0,1.4385104179382324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,4,128,1,float16,float16,0,1.2489935874938964
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,4,128,1,float16,fp8,0,1.1012864112854004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,4,128,1,fp8,fp8,0,1.2663439750671386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,12,128,1,float16,fp8,0,0.6388559818267823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,12,128,1,fp8,fp8,0,0.6376880168914795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,1,128,1,float16,float16,0,0.691593599319458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,1,128,1,float16,fp8,0,0.717145586013794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,1,128,1,fp8,fp8,0,0.6208896160125732
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,2,128,1,float16,float16,0,0.692464017868042
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,2,128,1,float16,fp8,0,0.619761610031128
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,2,128,1,fp8,fp8,0,0.6192319869995118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,4,128,1,float16,float16,0,0.6983776092529297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,4,128,1,float16,fp8,0,0.618993616104126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,4,128,1,fp8,fp8,0,0.6207664012908936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,1,128,1,float16,float16,0,3.6856510162353517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,1,128,1,float16,fp8,0,2.4683696746826174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,1,128,1,fp8,fp8,0,2.4236303329467774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,2,128,1,float16,float16,0,3.033404731750488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,2,128,1,float16,fp8,0,2.4345935821533202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,2,128,1,fp8,fp8,0,2.433238410949707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,4,128,1,float16,float16,0,3.7300704956054687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,4,128,1,float16,fp8,0,2.715255928039551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,4,128,1,fp8,fp8,0,2.587131118774414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,12,128,1,float16,float16,0,1.3154959678649902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,12,128,1,float16,float16,0,2.7973936080932615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,12,128,1,float16,fp8,0,1.3077072143554687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,12,128,1,float16,float16,0,0.7124224185943604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,12,128,1,fp8,fp8,0,1.3249600410461426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,12,128,1,float16,float16,0,1.4362768173217773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,1,128,1,float16,float16,0,1.4102751731872558
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,1,128,1,float16,fp8,0,1.240995216369629
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,1,128,1,fp8,fp8,0,1.2569567680358886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,2,128,1,float16,float16,0,1.7620399475097657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,2,128,1,float16,fp8,0,1.3220288276672363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,2,128,1,fp8,fp8,0,1.2385168075561523
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,4,128,1,float16,float16,0,1.4013263702392578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,4,128,1,float16,fp8,0,1.4301744461059571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,4,128,1,fp8,fp8,0,1.2373552322387695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,12,128,1,float16,float16,0,1.0566240310668946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,12,128,1,float16,fp8,0,0.713804817199707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,12,128,1,fp8,fp8,0,0.6988368034362793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,1,128,1,float16,float16,0,0.8260208129882812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,1,128,1,float16,fp8,0,0.6804975986480712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,1,128,1,fp8,fp8,0,0.6736591815948486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,2,128,1,float16,float16,0,0.783519983291626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,2,128,1,float16,fp8,0,0.6781407833099365
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,2,128,1,fp8,fp8,0,0.6750095844268799
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,4,128,1,float16,fp8,0,0.6782127857208252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,4,128,1,fp8,fp8,0,0.6750671863555908
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,12,128,1,float16,float16,0,0.4498720169067383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,12,128,1,float16,fp8,0,0.402236795425415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,12,128,1,fp8,fp8,0,0.405185604095459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,1,128,1,float16,float16,0,0.42690238952636717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,1,128,1,float16,fp8,0,0.3909183979034424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,1,128,1,fp8,fp8,0,0.3883232116699219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,2,128,1,float16,float16,0,0.4284639835357666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,2,128,1,float16,fp8,0,0.3887808084487915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,2,128,1,fp8,fp8,0,0.39194560050964355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,4,128,1,float16,float16,0,0.43356962203979493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,4,128,1,float16,fp8,0,0.39167840480804444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,4,128,1,fp8,fp8,0,0.39453279972076416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,4,128,1,float16,float16,0,0.7799952030181885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,1,128,1,float16,float16,0,2.1148719787597656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,1,128,1,fp8,fp8,0,1.7277408599853517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,2,128,1,float16,float16,0,2.046611213684082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,2,128,1,float16,fp8,0,1.7497167587280273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,2,128,1,fp8,fp8,0,1.7667791366577148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,4,128,1,float16,float16,0,2.4013439178466798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,1,128,1,float16,fp8,0,1.7147455215454102
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,4,128,1,float16,fp8,0,1.7084447860717773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,4,128,1,fp8,fp8,0,1.7884239196777343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,12,128,1,float16,fp8,0,0.9464480400085449
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,12,128,1,float16,float16,0,1.3668479919433594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,12,128,1,fp8,fp8,0,0.9456831932067871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,1,128,1,float16,float16,0,1.0267807960510253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,1,128,1,float16,fp8,0,0.9089232444763183
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,2,128,1,float16,float16,0,0.9875359535217285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,1,128,1,fp8,fp8,0,1.1147631645202636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,2,128,1,float16,fp8,0,1.0782719612121583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,2,128,1,fp8,fp8,0,0.9059359550476074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,4,128,1,float16,fp8,0,0.9128479957580566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,4,128,1,float16,float16,0,1.1904047966003417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,12,128,1,float16,float16,0,0.5802800178527832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,4,128,1,fp8,fp8,0,0.9185903549194336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,12,128,1,fp8,fp8,0,0.6907695770263672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,1,128,1,float16,float16,0,0.5522047996520996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,1,128,1,float16,fp8,0,0.5089231967926026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,1,128,1,fp8,fp8,0,0.5005280017852783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,2,128,1,float16,float16,0,0.5569295883178711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,2,128,1,float16,fp8,0,0.5070303916931153
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,2,128,1,fp8,fp8,0,0.49944319725036623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,4,128,1,float16,float16,0,0.5614448070526123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,4,128,1,float16,fp8,0,0.5069695949554444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,12,128,1,float16,float16,0,0.3378335952758789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,12,128,1,float16,fp8,0,0.30916800498962405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,12,128,1,fp8,fp8,0,0.31291038990020753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,1,128,1,float16,float16,0,0.32467200756073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,1,128,1,float16,fp8,0,0.3007328033447266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,1,128,1,fp8,fp8,0,0.29655520915985106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,2,128,1,float16,float16,0,0.3208784103393555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,12,128,1,float16,fp8,0,0.5291791915893554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,2,128,1,fp8,fp8,0,0.29365599155426025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,4,128,1,float16,float16,0,0.3292815923690796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,4,128,1,float16,fp8,0,0.2934479951858521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,4,128,1,fp8,fp8,0,0.29634718894958495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,4,128,1,fp8,fp8,0,0.49904961585998536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,1,128,1,float16,float16,0,2.6265520095825194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,2,128,1,float16,fp8,0,0.2974384069442749
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,1,128,1,fp8,fp8,0,2.233812713623047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,2,128,1,float16,fp8,0,2.2456079483032227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,2,128,1,fp8,fp8,0,2.2479759216308595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,1,128,1,float16,fp8,0,2.25787353515625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,2,128,1,float16,float16,0,3.7815631866455077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,12,128,1,float16,float16,0,1.7075599670410155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,4,128,1,float16,fp8,0,2.260593605041504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,4,128,1,fp8,fp8,0,2.2562000274658205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,4,128,1,float16,float16,0,2.948628807067871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,12,128,1,float16,fp8,0,1.3675600051879884
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,1,128,1,float16,float16,0,1.2712512016296387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,1,128,1,float16,fp8,0,1.36975040435791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,1,128,1,fp8,fp8,0,1.175819206237793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,2,128,1,float16,fp8,0,1.1746895790100098
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,2,128,1,fp8,fp8,0,1.157759952545166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,4,128,1,float16,float16,0,1.3016655921936036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,12,128,1,fp8,fp8,0,1.2368528366088867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,4,128,1,float16,fp8,0,1.1566847801208495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,12,128,1,float16,float16,0,0.7174704074859619
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,12,128,1,float16,fp8,0,0.6560688018798828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,4,128,1,fp8,fp8,0,1.6940095901489258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,12,128,1,fp8,fp8,0,0.6591599941253662
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,1,128,1,float16,float16,0,0.6770143985748291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,2,128,1,float16,float16,0,1.7026639938354493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,1,128,1,float16,fp8,0,0.6361279964447022
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,2,128,1,float16,float16,0,0.7025856018066406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,2,128,1,float16,fp8,0,0.6290319919586181
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,2,128,1,fp8,fp8,0,0.6324351787567138
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,4,128,1,float16,float16,0,0.699019193649292
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,4,128,1,float16,fp8,0,0.6299920082092285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,4,128,1,fp8,fp8,0,0.6278160095214844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,12,128,1,float16,float16,0,0.4115327835083008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,12,128,1,float16,fp8,0,0.3723664045333862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,12,128,1,fp8,fp8,0,0.3751456022262573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,1,128,1,float16,float16,0,0.38469600677490234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,1,128,1,float16,fp8,0,0.3540544033050537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,1,128,1,fp8,fp8,0,0.35266880989074706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,1,128,1,fp8,fp8,0,0.6261280059814454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,2,128,1,float16,fp8,0,0.3565648078918457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,2,128,1,float16,float16,0,0.3895967960357666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,2,128,1,fp8,fp8,0,0.3471184015274048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,4,128,1,float16,float16,0,0.3913072109222412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,4,128,1,float16,fp8,0,0.3545615911483765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,4,128,1,fp8,fp8,0,0.3476448059082031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,12,128,1,float16,float16,0,0.23982720375061034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,12,128,1,fp8,fp8,0,0.21571199893951415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,12,128,1,float16,fp8,0,0.21882240772247313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,1,128,1,float16,float16,0,0.2280639886856079
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,1,128,1,float16,fp8,0,0.20697760581970215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,1,128,1,fp8,fp8,0,0.2092736005783081
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,2,128,1,float16,float16,0,0.2273360013961792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,2,128,1,float16,fp8,0,0.2057919979095459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,2,128,1,fp8,fp8,0,0.21035680770874024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,4,128,1,float16,float16,0,0.22980000972747802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,4,128,1,float16,fp8,0,0.20627999305725098
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,4,128,1,fp8,fp8,0,0.21010398864746094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,1,128,1,float16,fp8,0,1.3551679611206056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,1,128,1,float16,float16,0,1.4512080192565917
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,1,128,1,fp8,fp8,0,1.368660831451416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,2,128,1,float16,float16,0,1.433420753479004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,2,128,1,float16,fp8,0,1.3531439781188965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,2,128,1,fp8,fp8,0,1.479748821258545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,4,128,1,float16,float16,0,1.5069439888000489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,4,128,1,float16,fp8,0,1.452000045776367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,12,128,1,float16,float16,0,0.837065601348877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,12,128,1,float16,fp8,0,0.7759984016418457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,4,128,1,fp8,fp8,0,1.3517135620117187
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,12,128,1,fp8,fp8,0,0.7845712184906006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,1,128,1,float16,float16,0,0.7815792083740234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,1,128,1,fp8,fp8,0,0.7128943920135498
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,1,128,1,float16,fp8,0,0.7223231792449951
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,2,128,1,float16,float16,0,0.7629951953887939
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,2,128,1,float16,fp8,0,0.8181088447570801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,2,128,1,fp8,fp8,0,0.710532808303833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,4,128,1,float16,float16,0,0.7982192039489746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,12,128,1,float16,float16,0,0.4490272045135498
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,4,128,1,float16,fp8,0,0.713259220123291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,4,128,1,fp8,fp8,0,0.7186160087585449
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,12,128,1,float16,fp8,0,0.4231328010559082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,12,128,1,fp8,fp8,0,0.4195712089538574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,1,128,1,float16,float16,0,0.4221343994140625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,1,128,1,float16,fp8,0,0.39358398914337156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,1,128,1,fp8,fp8,0,0.39505441188812257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,2,128,1,float16,float16,0,0.41248478889465334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,2,128,1,float16,fp8,0,0.3979680061340332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,4,128,1,float16,fp8,0,0.3876559972763062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,4,128,1,fp8,fp8,0,0.39284160137176516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,12,128,1,float16,float16,0,0.25461440086364745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,12,128,1,float16,fp8,0,0.23832321166992188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,12,128,1,fp8,fp8,0,0.23668959140777587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,1,128,1,float16,float16,0,0.23588640689849855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,1,128,1,float16,fp8,0,0.2228543996810913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,1,128,1,fp8,fp8,0,0.22384319305419922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,2,128,1,float16,float16,0,0.23589119911193848
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,2,128,1,float16,fp8,0,0.22274401187896728
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,2,128,1,fp8,fp8,0,0.222273588180542
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,4,128,1,float16,float16,0,0.24367520809173585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,4,128,1,float16,fp8,0,0.22258400917053223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,4,128,1,fp8,fp8,0,0.2230448007583618
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,12,128,1,float16,float16,0,0.1570912003517151
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,12,128,1,float16,fp8,0,0.14530080556869507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,1,128,1,float16,float16,0,0.14769599437713624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,2,128,1,fp8,fp8,0,0.39484319686889646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,1,128,1,float16,fp8,0,0.1401087999343872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,1,128,1,fp8,fp8,0,0.1413264036178589
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,4,128,1,float16,float16,0,0.4301328182220459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,2,128,1,float16,float16,0,0.14891519546508789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,2,128,1,float16,fp8,0,0.1411471962928772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,2,128,1,fp8,fp8,0,0.14169440269470215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,4,128,1,float16,fp8,0,0.1421023964881897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,4,128,1,fp8,fp8,0,0.14238400459289552
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,1,128,1,float16,float16,0,1.3716912269592285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,1,128,1,float16,fp8,0,1.3413951873779297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,12,128,1,fp8,fp8,0,0.14756640195846557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,1,128,1,fp8,fp8,0,1.4414143562316895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,2,128,1,float16,float16,0,1.3902416229248047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,4,128,1,float16,float16,0,0.15016800165176392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,2,128,1,float16,fp8,0,1.326360034942627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,2,128,1,fp8,fp8,0,1.3404527664184571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,4,128,1,float16,fp8,0,1.3266783714294434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,4,128,1,fp8,fp8,0,1.3252623558044434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,4,128,1,float16,float16,0,1.6944320678710938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,12,128,1,float16,fp8,0,0.7625967979431152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,12,128,1,fp8,fp8,0,0.7603871822357178
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,1,128,1,float16,float16,0,0.7961008071899414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,1,128,1,float16,fp8,0,0.6939616203308105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,1,128,1,fp8,fp8,0,0.6910352230072021
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,2,128,1,float16,float16,0,0.7407167911529541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,2,128,1,float16,fp8,0,0.6889743804931641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,2,128,1,fp8,fp8,0,0.7423327922821045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,4,128,1,float16,float16,0,0.7218480110168457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,12,128,1,float16,float16,0,0.8096207618713379
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,4,128,1,float16,fp8,0,0.6942575931549072
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,4,128,1,fp8,fp8,0,0.7019743919372559
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,12,128,1,float16,float16,0,0.43434720039367675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,12,128,1,float16,fp8,0,0.4110879898071289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,12,128,1,fp8,fp8,0,0.4045584201812744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,1,128,1,float16,fp8,0,0.38091840744018557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,1,128,1,fp8,fp8,0,0.3714224100112915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,2,128,1,float16,float16,0,0.3910543918609619
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,2,128,1,float16,fp8,0,0.3709968090057373
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,2,128,1,fp8,fp8,0,0.3759040117263794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,4,128,1,float16,float16,0,0.39030559062957765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,4,128,1,float16,fp8,0,0.37437920570373534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,4,128,1,fp8,fp8,0,0.3694783926010132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,12,128,1,float16,float16,0,0.24116160869598388
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,12,128,1,float16,fp8,0,0.22470560073852539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,12,128,1,fp8,fp8,0,0.22728641033172609
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,1,128,1,float16,fp8,0,0.2072751998901367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,1,128,1,float16,float16,0,0.21513919830322265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,1,128,1,fp8,fp8,0,0.20911040306091308
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,2,128,1,float16,fp8,0,0.20886080265045165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,2,128,1,float16,float16,0,0.2143712043762207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,2,128,1,fp8,fp8,0,0.20992960929870605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,1,128,1,float16,float16,0,0.3910432100296021
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,4,128,1,float16,fp8,0,0.20796959400177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,4,128,1,fp8,fp8,0,0.21079840660095214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,12,128,1,float16,fp8,0,0.13052799701690673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,1,128,1,float16,float16,0,0.12526079416275024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,1,128,1,float16,fp8,0,0.12136640548706054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,1,128,1,fp8,fp8,0,0.12118560075759888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,2,128,1,float16,float16,0,0.12561440467834473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,2,128,1,float16,fp8,0,0.12150559425354004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,2,128,1,fp8,fp8,0,0.12103840112686157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,4,128,1,float16,float16,0,0.12847520112991334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,4,128,1,float16,fp8,0,0.12157119512557983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,4,128,1,fp8,fp8,0,0.12163519859313965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,12,128,1,float16,float16,0,0.08444960117340088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,12,128,1,float16,fp8,0,0.08229920268058777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,12,128,1,fp8,fp8,0,0.0825215995311737
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,4,128,1,float16,float16,0,0.2207360029220581
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,1,128,1,float16,float16,0,0.08052480220794678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,12,128,1,float16,float16,0,0.1419376015663147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,1,128,1,float16,fp8,0,0.07846400141716003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,1,128,1,fp8,fp8,0,0.0783728003501892
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,2,128,1,float16,float16,0,0.08214880228042602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,2,128,1,float16,fp8,0,0.07912319898605347
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,2,128,1,fp8,fp8,0,0.07805439829826355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,4,128,1,float16,float16,0,0.08348320126533508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,4,128,1,fp8,fp8,0,0.07750399708747864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,1,128,1,float16,float16,0,0.8449983596801758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,1,128,1,float16,fp8,0,0.8338255882263184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,1,128,1,fp8,fp8,0,0.8461695671081543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,2,128,1,float16,float16,0,0.8224528312683106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,2,128,1,float16,fp8,0,0.8424127578735352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,2,128,1,fp8,fp8,0,1.045188808441162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,12,128,1,fp8,fp8,0,0.1335055947303772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,4,128,1,float16,fp8,0,0.08018400073051453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,4,128,1,float16,float16,0,0.8651311874389649
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,4,128,1,float16,fp8,0,0.8323311805725098
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,12,128,1,float16,fp8,0,0.49672160148620603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,4,128,1,fp8,fp8,0,0.8393600463867188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,12,128,1,fp8,fp8,0,0.49058079719543457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,1,128,1,float16,float16,0,0.449019193649292
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,1,128,1,float16,fp8,0,0.4478816032409668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,1,128,1,fp8,fp8,0,0.43895840644836426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,2,128,1,float16,float16,0,0.4479504108428955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,2,128,1,float16,fp8,0,0.44698081016540525
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,2,128,1,fp8,fp8,0,0.4378767967224121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,4,128,1,float16,float16,0,0.46099200248718264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,4,128,1,float16,fp8,0,0.4433599948883057
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,4,128,1,fp8,fp8,0,0.43613600730895996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,12,128,1,float16,float16,0,0.2797231912612915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,12,128,1,float16,fp8,0,0.26894240379333495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,1,128,1,float16,fp8,0,0.2424367904663086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,1,128,1,fp8,fp8,0,0.24207839965820313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,12,128,1,float16,float16,0,0.508350419998169
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,2,128,1,float16,float16,0,0.24578559398651123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,2,128,1,float16,fp8,0,0.239847993850708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,2,128,1,fp8,fp8,0,0.2419071912765503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,4,128,1,float16,float16,0,0.24915680885314942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,4,128,1,float16,fp8,0,0.24029920101165772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,4,128,1,fp8,fp8,0,0.23878560066223145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,12,128,1,float16,float16,0,0.15805439949035643
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,12,128,1,float16,fp8,0,0.14948159456253052
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,12,128,1,fp8,fp8,0,0.1509711980819702
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,1,128,1,float16,fp8,0,0.13593920469284057
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,1,128,1,fp8,fp8,0,0.13646880388259888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,1,128,1,float16,float16,0,0.2498687982559204
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,2,128,1,float16,float16,0,0.1398848056793213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,2,128,1,float16,fp8,0,0.13563519716262817
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,4,128,1,float16,float16,0,0.14229439496994017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,2,128,1,fp8,fp8,0,0.1376144051551819
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,4,128,1,float16,fp8,0,0.1365615963935852
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,4,128,1,fp8,fp8,0,0.13590719699859619
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,12,128,1,float16,float16,0,0.09567040205001831
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,12,128,1,float16,fp8,0,0.09195039868354797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,12,128,1,fp8,fp8,0,0.09083840250968933
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,1,128,1,float16,float16,0,0.08653600215911865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,1,128,1,float16,fp8,0,0.08441439867019654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,1,128,1,fp8,fp8,0,0.08463839888572693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,2,128,1,float16,float16,0,0.08689759969711304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,2,128,1,float16,fp8,0,0.0845408022403717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,2,128,1,fp8,fp8,0,0.08451039791107177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,4,128,1,float16,float16,0,0.08853600025177003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,12,128,1,fp8,fp8,0,0.2647311925888062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,4,128,1,float16,fp8,0,0.08476639986038208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,4,128,1,fp8,fp8,0,0.08343039751052857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,12,128,1,float16,float16,0,0.05380799770355225
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,12,128,1,float16,fp8,0,0.053504002094268796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,12,128,1,fp8,fp8,0,0.053964799642562865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,1,128,1,float16,float16,0,0.05142880082130432
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,1,128,1,float16,fp8,0,0.05161280035972595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,1,128,1,fp8,fp8,0,0.050020802021026614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,2,128,1,float16,float16,0,0.05166879892349243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,2,128,1,fp8,fp8,0,0.051472002267837526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,4,128,1,float16,float16,0,0.052215999364852904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,4,128,1,float16,fp8,0,0.05125759840011597
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,4,128,1,fp8,fp8,0,0.051579201221466066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,1,128,1,float16,float16,0,0.13579039573669432
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,1,128,1,float16,float16,0,0.8394831657409668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,1,128,1,float16,fp8,0,0.8675024032592773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,1,128,1,fp8,fp8,0,0.8744256019592285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,2,128,1,float16,float16,0,0.8158720016479493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,2,128,1,float16,fp8,0,0.050470399856567386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,2,128,1,float16,fp8,0,0.8747407913208007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,2,128,1,fp8,fp8,0,0.983243179321289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,4,128,1,float16,float16,0,0.9675583839416504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,4,128,1,fp8,fp8,0,0.8744064331054687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,12,128,1,float16,float16,0,0.5337776184082031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,12,128,1,float16,fp8,0,0.5663648128509522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,1,128,1,float16,float16,0,0.4448063850402832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,1,128,1,float16,fp8,0,0.4668288230895996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,1,128,1,fp8,fp8,0,0.4694528102874756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,2,128,1,float16,float16,0,0.4302815914154053
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,2,128,1,float16,fp8,0,0.46851358413696287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,2,128,1,fp8,fp8,0,0.4503488063812256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,4,128,1,float16,float16,0,0.4577919960021973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,4,128,1,float16,fp8,0,0.4492976188659668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,12,128,1,float16,float16,0,0.2780911922454834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,4,128,1,fp8,fp8,0,0.45285601615905763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,4,128,1,float16,fp8,0,0.8640144348144532
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,12,128,1,float16,fp8,0,0.2787440061569214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,1,128,1,float16,float16,0,0.23283040523529053
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,1,128,1,float16,fp8,0,0.2423935890197754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,1,128,1,fp8,fp8,0,0.24342238903045654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,12,128,1,fp8,fp8,0,0.5286159992218018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,2,128,1,float16,fp8,0,0.24081599712371826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,2,128,1,fp8,fp8,0,0.24432640075683593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,4,128,1,float16,float16,0,0.24020800590515137
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,4,128,1,float16,fp8,0,0.24368638992309571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,4,128,1,fp8,fp8,0,0.23964159488677977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,12,128,1,float16,float16,0,0.15406559705734252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,12,128,1,float16,fp8,0,0.15180480480194092
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,12,128,1,fp8,fp8,0,0.15294560194015502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,1,128,1,float16,fp8,0,0.13255200386047364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,1,128,1,fp8,fp8,0,0.13297439813613893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,2,128,1,float16,float16,0,0.13203359842300416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,2,128,1,float16,fp8,0,0.13417600393295287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,2,128,1,fp8,fp8,0,0.13351839780807495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,12,128,1,fp8,fp8,0,0.2798768043518066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,4,128,1,float16,float16,0,0.13732000589370727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,4,128,1,float16,fp8,0,0.13455040454864503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,4,128,1,fp8,fp8,0,0.13346560001373292
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,12,128,1,float16,fp8,0,0.09088479876518249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,2,128,1,float16,float16,0,0.23255999088287355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,12,128,1,fp8,fp8,0,0.08829439878463745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,1,128,1,float16,float16,0,0.07643200159072876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,1,128,1,float16,fp8,0,0.07959359884262085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,1,128,1,fp8,fp8,0,0.07818719744682312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,2,128,1,float16,float16,0,0.07738720178604126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,2,128,1,float16,fp8,0,0.07824640274047852
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,2,128,1,fp8,fp8,0,0.07923679947853088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,4,128,1,float16,float16,0,0.07833279967308045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,4,128,1,float16,fp8,0,0.07900320291519165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,4,128,1,fp8,fp8,0,0.07811040282249451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,12,128,1,float16,float16,0,0.055264002084732054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,12,128,1,float16,fp8,0,0.05610880255699158
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,1,128,1,float16,float16,0,0.12877440452575684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,12,128,1,fp8,fp8,0,0.05742239952087402
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,1,128,1,float16,fp8,0,0.051583999395370485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,1,128,1,fp8,fp8,0,0.0526528000831604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,2,128,1,float16,float16,0,0.05170239806175232
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,2,128,1,float16,fp8,0,0.05325279831886291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,2,128,1,fp8,fp8,0,0.05164480209350586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,4,128,1,float16,float16,0,0.05354239940643311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,4,128,1,float16,fp8,0,0.051734399795532224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,4,128,1,fp8,fp8,0,0.05287359952926636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,12,128,1,float16,float16,0,0.039124798774719236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,12,128,1,float16,fp8,0,0.04066559970378876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,12,128,1,fp8,fp8,0,0.039129599928855896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,1,128,1,float16,float16,0,0.03719359934329987
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,1,128,1,float16,fp8,0,0.037196800112724304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,1,128,1,fp8,fp8,0,0.03796960115432739
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,2,128,1,float16,float16,0,0.037092798948287965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,2,128,1,float16,fp8,0,0.03718400001525879
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,2,128,1,fp8,fp8,0,0.03723680078983307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,4,128,1,float16,float16,0,0.037161600589752194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,4,128,1,float16,fp8,0,0.037169599533081056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,4,128,1,fp8,fp8,0,0.0371071994304657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,1,128,1,float16,float16,0,0.5249855995178223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,1,128,1,float16,float16,0,0.05157439708709717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,1,128,1,float16,fp8,0,0.5694623947143554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,1,128,1,fp8,fp8,0,0.5731520175933837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,2,128,1,float16,float16,0,0.5206639766693115
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,2,128,1,float16,fp8,0,0.5700448036193848
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,2,128,1,fp8,fp8,0,0.5689695835113525
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,12,128,1,float16,float16,0,0.09063199758529664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,4,128,1,float16,float16,0,0.5523151874542236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,4,128,1,float16,fp8,0,0.5684639930725097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,12,128,1,float16,float16,0,0.3504928112030029
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,12,128,1,float16,fp8,0,0.35341439247131345
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,4,128,1,fp8,fp8,0,0.5716032028198242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,1,128,1,float16,float16,0,0.2782432079315186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,12,128,1,fp8,fp8,0,0.35513598918914796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,1,128,1,float16,fp8,0,0.30391359329223633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,2,128,1,float16,float16,0,0.2802720069885254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,2,128,1,float16,fp8,0,0.30109438896179197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,4,128,1,float16,float16,0,0.2932687997817993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,4,128,1,float16,fp8,0,0.3016400098800659
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,12,128,1,float16,float16,0,0.1903663992881775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,4,128,1,fp8,fp8,0,0.30202560424804686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,12,128,1,float16,fp8,0,0.19096319675445556
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,12,128,1,fp8,fp8,0,0.18877120018005372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,1,128,1,float16,float16,0,0.15527199506759642
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,1,128,1,float16,fp8,0,0.16111520528793336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,2,128,1,float16,float16,0,0.15065120458602904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,1,128,1,fp8,fp8,0,0.1635167956352234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,2,128,1,float16,fp8,0,0.16064640283584594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,2,128,1,fp8,fp8,0,0.1633631944656372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,4,128,1,float16,float16,0,0.15993599891662597
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,4,128,1,float16,fp8,0,0.16346399784088134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,4,128,1,fp8,fp8,0,0.160806405544281
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,12,128,1,float16,float16,0,0.1056399941444397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,12,128,1,float16,fp8,0,0.1051967978477478
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,12,128,1,fp8,fp8,0,0.10642880201339722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,1,128,1,float16,float16,0,0.08544319868087769
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,1,128,1,float16,fp8,0,0.09085279703140259
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,2,128,1,float16,float16,0,0.08758400082588196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,2,128,1,float16,fp8,0,0.0908415973186493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,2,128,1,fp8,fp8,0,0.3020096063613892
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,2,128,1,fp8,fp8,0,0.09113439917564392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,4,128,1,float16,float16,0,0.09161440134048462
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,4,128,1,float16,fp8,0,0.09231839776039123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,12,128,1,float16,float16,0,0.06329280138015747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,12,128,1,float16,fp8,0,0.06225600242614746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,12,128,1,fp8,fp8,0,0.062033599615097045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,1,128,1,float16,float16,0,0.05415999889373779
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,1,128,1,float16,fp8,0,0.05567359924316406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,1,128,1,fp8,fp8,0,0.055593597888946536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,2,128,1,float16,float16,0,0.05404319763183594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,2,128,1,float16,fp8,0,0.055731201171875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,2,128,1,fp8,fp8,0,0.05564000010490418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,4,128,1,float16,float16,0,0.0559503972530365
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,1,128,1,fp8,fp8,0,0.08983680009841918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,4,128,1,float16,fp8,0,0.055619198083877566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,4,128,1,fp8,fp8,0,0.05562719702720642
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,1,128,1,fp8,fp8,0,0.3009903907775879
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,12,128,1,float16,float16,0,0.03517920076847077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,12,128,1,fp8,fp8,0,0.03712159991264343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,1,128,1,float16,float16,0,0.033004799485206605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,1,128,1,float16,fp8,0,0.032996800541877744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,2,128,1,float16,float16,0,0.032950401306152344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,2,128,1,float16,fp8,0,0.03299039900302887
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,2,128,1,fp8,fp8,0,0.03309119939804077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,4,128,1,float16,float16,0,0.033046400547027587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,4,128,1,float16,fp8,0,0.033020800352096556
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,4,128,1,fp8,fp8,0,0.032950401306152344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,12,128,1,float16,float16,0,0.032969599962234496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,12,128,1,float16,fp8,0,0.03292959928512573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,12,128,1,fp8,fp8,0,0.03303360044956207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,1,128,1,float16,float16,0,0.030904000997543334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,1,128,1,float16,fp8,0,0.030929601192474364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,1,128,1,fp8,fp8,0,0.030929601192474364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,2,128,1,float16,float16,0,0.03094240128993988
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,2,128,1,float16,fp8,0,0.03091999888420105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,2,128,1,fp8,fp8,0,0.030908799171447753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,4,128,1,float16,float16,0,0.031062400341033934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,4,128,1,float16,fp8,0,0.031006398797035217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,4,128,1,fp8,fp8,0,0.030935999751091004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,12,128,1,float16,fp8,0,0.03712159991264343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,1,128,1,fp8,fp8,0,0.03433600068092346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,1,128,1,float16,float16,0,0.557431983947754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,1,128,1,float16,fp8,0,0.634611177444458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,4,128,1,fp8,fp8,0,0.09064480066299438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,2,128,1,float16,float16,0,0.5502160072326661
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,1,128,1,fp8,fp8,0,0.6384047985076904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,2,128,1,float16,fp8,0,0.6313807964324951
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,2,128,1,fp8,fp8,0,0.6376016139984131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,4,128,1,float16,float16,0,0.5893216133117676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,4,128,1,float16,fp8,0,0.6349520206451416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,4,128,1,fp8,fp8,0,0.6302080154418945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,12,128,1,float16,fp8,0,0.4000351905822754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,12,128,1,fp8,fp8,0,0.40248799324035645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,1,128,1,float16,fp8,0,0.3300816059112549
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,1,128,1,fp8,fp8,0,0.33361599445343015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,2,128,1,float16,float16,0,0.288428807258606
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,2,128,1,float16,fp8,0,0.32873120307922366
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,2,128,1,fp8,fp8,0,0.32879199981689455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,4,128,1,float16,float16,0,0.3095360040664673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,4,128,1,float16,fp8,0,0.32775518894195554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,4,128,1,fp8,fp8,0,0.3276688098907471
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,12,128,1,float16,float16,0,0.39168639183044435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,12,128,1,float16,float16,0,0.20424480438232423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,12,128,1,float16,fp8,0,0.21323680877685547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,1,128,1,float16,float16,0,0.29153120517730713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,12,128,1,fp8,fp8,0,0.21122078895568847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,1,128,1,float16,fp8,0,0.17481119632720948
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,1,128,1,fp8,fp8,0,0.1754480004310608
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,2,128,1,float16,float16,0,0.15714880228042602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,2,128,1,float16,fp8,0,0.1750383973121643
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,2,128,1,fp8,fp8,0,0.1742591977119446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,4,128,1,float16,float16,0,0.165937602519989
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,4,128,1,float16,fp8,0,0.17414079904556273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,4,128,1,fp8,fp8,0,0.17401599884033203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,12,128,1,float16,float16,0,0.11043839454650879
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,12,128,1,float16,fp8,0,0.11426559686660767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,12,128,1,fp8,fp8,0,0.11488959789276124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,1,128,1,float16,float16,0,0.08710399866104127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,1,128,1,float16,fp8,0,0.09483680129051208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,1,128,1,fp8,fp8,0,0.09427040219306945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,2,128,1,float16,float16,0,0.08816800117492676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,2,128,1,float16,fp8,0,0.09472320079803467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,2,128,1,fp8,fp8,0,0.09503679871559143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,4,128,1,float16,float16,0,0.0928928017616272
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,4,128,1,float16,fp8,0,0.09552800059318542
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,4,128,1,fp8,fp8,0,0.09601119756698609
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,12,128,1,float16,float16,0,0.06398879885673522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,12,128,1,float16,fp8,0,0.06592159867286682
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,12,128,1,fp8,fp8,0,0.06565759778022766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,1,128,1,float16,float16,0,0.05291200280189514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,1,128,1,float16,fp8,0,0.05631840229034424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,1,128,1,fp8,fp8,0,0.05660799741744995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,2,128,1,float16,float16,0,0.051579201221466066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,2,128,1,float16,fp8,0,0.05643200278282166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,2,128,1,fp8,fp8,0,0.055619198083877566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,4,128,1,float16,float16,0,0.05492479801177978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,4,128,1,float16,fp8,0,0.05662879943847656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,1,128,1,float16,float16,0,0.15857759714126587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,12,128,1,float16,fp8,0,0.042182400822639465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,1,128,1,float16,float16,0,0.036831998825073244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,1,128,1,float16,fp8,0,0.037544000148773196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,1,128,1,fp8,fp8,0,0.037643200159072875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,2,128,1,float16,float16,0,0.036206400394439696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,2,128,1,float16,fp8,0,0.03739840090274811
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,2,128,1,fp8,fp8,0,0.0372624009847641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,4,128,1,float16,float16,0,0.037195199728012086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,4,128,1,float16,fp8,0,0.03765920102596283
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,4,128,1,fp8,fp8,0,0.037771201133728026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,12,128,1,float16,float16,0,0.026947200298309326
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,12,128,1,float16,fp8,0,0.02902719974517822
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,12,128,1,fp8,fp8,0,0.029003199934959412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,1,128,1,float16,float16,0,0.0257423996925354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,1,128,1,float16,fp8,0,0.026876801252365114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,1,128,1,fp8,fp8,0,0.02699359953403473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,2,128,1,float16,float16,0,0.02492000013589859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,2,128,1,float16,fp8,0,0.02691679894924164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,2,128,1,fp8,fp8,0,0.026998400688171387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,12,128,1,float16,float16,0,0.0389055997133255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,4,128,1,float16,float16,0,0.026795199513435362
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,4,128,1,float16,fp8,0,0.026843199133872987
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,4,128,1,fp8,fp8,0,0.026947200298309326
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,12,128,1,fp8,fp8,0,0.042238399386405945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,12,128,1,float16,fp8,0,0.026764801144599913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,12,128,1,fp8,fp8,0,0.02685439884662628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,1,128,1,float16,float16,0,0.02473440021276474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,1,128,1,fp8,fp8,0,0.024851199984550477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,1,128,1,float16,fp8,0,0.025358399748802184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,2,128,1,float16,float16,0,0.02486560046672821
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,2,128,1,fp8,fp8,0,0.02484000027179718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,4,128,1,float16,float16,0,0.02489600032567978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,4,128,1,float16,fp8,0,0.024934400618076325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,4,128,1,fp8,fp8,0,0.02489120066165924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,1,128,1,float16,float16,0,0.4291232109069824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,1,128,1,float16,fp8,0,0.5190095901489258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,4,128,1,fp8,fp8,0,0.05639359951019287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,1,128,1,fp8,fp8,0,0.5190383911132812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,12,128,1,float16,float16,0,0.024798400700092316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,2,128,1,float16,float16,0,0.41905918121337893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,2,128,1,float16,fp8,0,0.5164207935333252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,2,128,1,fp8,fp8,0,0.5130640029907226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,2,128,1,float16,fp8,0,0.025279998779296875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,4,128,1,float16,float16,0,0.45494718551635743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,4,128,1,float16,fp8,0,0.5160736083984375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,4,128,1,fp8,fp8,0,0.5117968082427978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,12,128,1,float16,fp8,0,0.3374000072479248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,12,128,1,fp8,fp8,0,0.33947200775146485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,1,128,1,float16,float16,0,0.22157280445098876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,1,128,1,fp8,fp8,0,0.2700592041015625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,2,128,1,float16,fp8,0,0.26938719749450685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,2,128,1,fp8,fp8,0,0.269598388671875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,4,128,1,float16,float16,0,0.2410736083984375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,4,128,1,float16,fp8,0,0.26783199310302735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,4,128,1,fp8,fp8,0,0.2677599906921387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,12,128,1,float16,float16,0,0.16864160299301148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,12,128,1,float16,fp8,0,0.17822240591049193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,12,128,1,float16,float16,0,0.32065439224243164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,12,128,1,fp8,fp8,0,0.17880799770355224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,1,128,1,float16,float16,0,0.12187520265579224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,1,128,1,float16,fp8,0,0.26725599765777586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,1,128,1,float16,fp8,0,0.14299360513687134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,1,128,1,fp8,fp8,0,0.14175039529800415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,2,128,1,float16,float16,0,0.12135679721832275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,2,128,1,float16,fp8,0,0.1408992052078247
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,2,128,1,fp8,fp8,0,0.14300320148468018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,4,128,1,float16,float16,0,0.12886559963226318
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,4,128,1,float16,fp8,0,0.14235199689865113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,12,128,1,float16,float16,0,0.091430401802063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,12,128,1,float16,fp8,0,0.09655359983444214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,12,128,1,fp8,fp8,0,0.09694399833679199
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,1,128,1,float16,float16,0,0.06796159744262695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,1,128,1,float16,fp8,0,0.07698559761047363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,1,128,1,fp8,fp8,0,0.07630720138549804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,2,128,1,float16,float16,0,0.06801760196685791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,2,128,1,float16,fp8,0,0.07692959904670715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,2,128,1,fp8,fp8,0,0.07688959836959838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,4,128,1,float16,float16,0,0.07295039892196656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,2,128,1,float16,float16,0,0.22092959880828858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,4,128,1,fp8,fp8,0,0.0765824019908905
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,12,128,1,float16,float16,0,0.0517632007598877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,12,128,1,float16,fp8,0,0.05435519814491272
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,12,128,1,fp8,fp8,0,0.05418239831924439
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,1,128,1,float16,float16,0,0.0398144006729126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,4,128,1,fp8,fp8,0,0.14099520444869995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,1,128,1,float16,fp8,0,0.04542239904403687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,2,128,1,float16,float16,0,0.040847998857498166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,2,128,1,fp8,fp8,0,0.04524799883365631
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,2,128,1,float16,fp8,0,0.04556640088558197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,4,128,1,float16,float16,0,0.04320639967918396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,4,128,1,float16,fp8,0,0.04583680033683777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,12,128,1,float16,float16,0,0.030943998694419862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,12,128,1,float16,fp8,0,0.03517760038375854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,12,128,1,fp8,fp8,0,0.03519200086593628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,1,128,1,float16,float16,0,0.028880000114440918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,1,128,1,float16,fp8,0,0.031071999669075014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,1,128,1,fp8,fp8,0,0.031071999669075014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,2,128,1,float16,float16,0,0.028881600499153136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,4,128,1,float16,fp8,0,0.07714400291442872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,2,128,1,fp8,fp8,0,0.030931198596954347
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,4,128,1,float16,float16,0,0.029028800129890443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,4,128,1,float16,fp8,0,0.031017601490020752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,4,128,1,fp8,fp8,0,0.030988800525665283
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,12,128,1,float16,float16,0,0.020873600244522096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,12,128,1,float16,fp8,0,0.022961600124835967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,12,128,1,fp8,fp8,0,0.022859199345111846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,1,128,1,float16,float16,0,0.020737600326538087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,1,128,1,fp8,fp8,0,0.0456063985824585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,1,128,1,float16,fp8,0,0.020843200385570526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,1,128,1,fp8,fp8,0,0.020729599893093108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,2,128,1,float16,float16,0,0.020710399746894835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,2,128,1,float16,fp8,0,0.02072480022907257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,4,128,1,fp8,fp8,0,0.045440000295639035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,4,128,1,float16,float16,0,0.02072480022907257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,4,128,1,float16,fp8,0,0.020737600326538087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,4,128,1,fp8,fp8,0,0.02069920003414154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,12,128,1,float16,float16,0,0.020500800013542174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,12,128,1,float16,fp8,0,0.020742399990558623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,1,128,1,float16,float16,0,0.018750399351119995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,1,128,1,float16,fp8,0,0.018747200071811677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,1,128,1,fp8,fp8,0,0.018713599443435668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,2,128,1,float16,float16,0,0.018753600120544434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,2,128,1,float16,fp8,0,0.018743999302387238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,2,128,1,fp8,fp8,0,0.018795199692249298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,4,128,1,float16,float16,0,0.018670399487018586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,4,128,1,float16,fp8,0,0.01873279958963394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,4,128,1,fp8,fp8,0,0.018697600066661834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,12,128,1,float16,float16,0,0.01878879964351654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,12,128,1,float16,fp8,0,0.018695999681949616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,12,128,1,fp8,fp8,0,0.018724800646305086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,1,128,1,float16,float16,0,0.018646399676799773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,1,128,1,float16,fp8,0,0.01884319931268692
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,1,128,1,fp8,fp8,0,0.01870879977941513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,2,128,1,fp8,fp8,0,0.02080159932374954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,2,128,1,float16,float16,0,0.018676799535751343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,2,128,1,float16,fp8,0,0.018780800700187682
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,2,128,1,fp8,fp8,0,0.018780800700187682
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,4,128,1,float16,float16,0,0.01865600049495697
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,4,128,1,float16,fp8,0,0.018723200261592864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,4,128,1,fp8,fp8,0,0.018812799453735353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,12,128,1,fp8,fp8,0,0.02069759964942932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,1,128,1,float16,float16,0,0.18352160453796387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,1,128,1,fp8,fp8,0,0.23435680866241454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,1,128,1,float16,fp8,0,0.23613920211791992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,2,128,1,float16,float16,0,0.18316960334777832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,2,128,1,float16,fp8,0,0.2333712100982666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,2,128,1,fp8,fp8,0,0.23582720756530762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,2,128,1,float16,fp8,0,0.031043198704719544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,4,128,1,float16,fp8,0,0.2322240114212036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,4,128,1,fp8,fp8,0,0.23764638900756835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,12,128,1,float16,float16,0,0.1493183970451355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,12,128,1,fp8,fp8,0,0.16167360544204712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,1,128,1,float16,float16,0,0.10098880529403687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,1,128,1,float16,fp8,0,0.12583520412445068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,1,128,1,fp8,fp8,0,0.12538399696350097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,2,128,1,float16,float16,0,0.10243840217590332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,2,128,1,float16,fp8,0,0.1255071997642517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,2,128,1,fp8,fp8,0,0.12598719596862792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,4,128,1,float16,float16,0,0.11147359609603882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,4,128,1,float16,fp8,0,0.1253615975379944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,4,128,1,fp8,fp8,0,0.12528159618377685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,12,128,1,float16,float16,0,0.08302080035209655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,12,128,1,float16,fp8,0,0.08850880265235901
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,12,128,1,fp8,fp8,0,0.08845279812812805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,1,128,1,float16,float16,0,0.05906559824943543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,4,128,1,float16,float16,0,0.2020944118499756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,1,128,1,float16,fp8,0,0.06828160285949707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,1,128,1,fp8,fp8,0,0.06800159811973572
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,2,128,1,float16,float16,0,0.05945119857788086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,2,128,1,float16,fp8,0,0.06883360147476196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,2,128,1,fp8,fp8,0,0.06954240202903747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,4,128,1,float16,float16,0,0.06354719996452332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,4,128,1,float16,fp8,0,0.06999679803848266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,4,128,1,fp8,fp8,0,0.0699567973613739
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,12,128,1,float16,float16,0,0.04669440090656281
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,12,128,1,float16,fp8,0,0.04785279929637909
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,12,128,1,fp8,fp8,0,0.04946880042552948
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,1,128,1,float16,float16,0,0.03307519853115082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,1,128,1,float16,fp8,0,0.03948799967765808
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,1,128,1,fp8,fp8,0,0.03916960060596466
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,2,128,1,float16,float16,0,0.03329919874668121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,2,128,1,float16,fp8,0,0.03919999897480011
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,2,128,1,fp8,fp8,0,0.12309600114822387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,4,128,1,float16,fp8,0,0.03925279974937439
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,4,128,1,fp8,fp8,0,0.03947519958019256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,12,128,1,float16,float16,0,0.02705279886722565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,12,128,1,float16,fp8,0,0.032795199751853944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,1,128,1,float16,float16,0,0.024953599274158477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,12,128,1,fp8,fp8,0,0.03329119980335236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,1,128,1,float16,fp8,0,0.028601598739624024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,1,128,1,fp8,fp8,0,0.029046401381492615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,12,128,1,float16,fp8,0,0.15970879793167114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,2,128,1,float16,fp8,0,0.02885279953479767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,2,128,1,fp8,fp8,0,0.026977598667144775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,4,128,1,float16,float16,0,0.026907199621200563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,4,128,1,float16,fp8,0,0.026897600293159483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,4,128,1,fp8,fp8,0,0.02879520058631897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,12,128,1,float16,float16,0,0.01865919977426529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,12,128,1,float16,fp8,0,0.02091200053691864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,12,128,1,fp8,fp8,0,0.020947200059890748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,1,128,1,float16,float16,0,0.016816000640392303
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,1,128,1,float16,fp8,0,0.01873600035905838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,1,128,1,fp8,fp8,0,0.018716800212860107
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,2,128,1,float16,fp8,0,0.018588800728321076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,2,128,1,fp8,fp8,0,0.01868479996919632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,4,128,1,float16,float16,0,0.01796319931745529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,4,128,1,float16,fp8,0,0.018825599551200868
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,4,128,1,float16,float16,0,0.035220798850059507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,12,128,1,float16,float16,0,0.016735999286174773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,12,128,1,float16,fp8,0,0.018641600012779237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,12,128,1,fp8,fp8,0,0.01871519982814789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,1,128,1,float16,float16,0,0.016590400040149687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,1,128,1,float16,fp8,0,0.016648000478744505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,1,128,1,fp8,fp8,0,0.01661919951438904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,2,128,1,float16,float16,0,0.016748799383640288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,2,128,1,float16,fp8,0,0.01669279932975769
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,2,128,1,float16,float16,0,0.02498079985380173
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,2,128,1,fp8,fp8,0,0.016894400119781494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,4,128,1,float16,float16,0,0.01666080057621002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,4,128,1,float16,fp8,0,0.016655999422073364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,12,128,1,float16,float16,0,0.016606399416923524
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,12,128,1,float16,fp8,0,0.016726399958133697
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,12,128,1,fp8,fp8,0,0.01661760061979294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,2,128,1,float16,float16,0,0.01658080071210861
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,1,128,1,float16,fp8,0,0.01656640022993088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,1,128,1,fp8,fp8,0,0.01655679941177368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,2,128,1,float16,float16,0,0.016571199893951415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,2,128,1,float16,fp8,0,0.01669439971446991
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,2,128,1,fp8,fp8,0,0.016551999747753142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,4,128,1,float16,float16,0,0.017319999635219574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,4,128,1,float16,fp8,0,0.016579200327396394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,4,128,1,fp8,fp8,0,0.01659359931945801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,12,128,1,float16,float16,0,0.016598400473594666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,12,128,1,float16,fp8,0,0.016547200083732606
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,12,128,1,fp8,fp8,0,0.016577599942684172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,1,128,1,float16,float16,0,0.016659200191497803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,1,128,1,float16,fp8,0,0.01658399999141693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,1,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,2,128,1,float16,float16,0,0.01652960032224655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,2,128,1,float16,fp8,0,0.015851199626922607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,2,128,1,fp8,fp8,0,0.016497600078582763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,4,128,1,float16,float16,0,0.015756799280643462
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,4,128,1,float16,fp8,0,0.016395199298858642
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,4,128,1,fp8,fp8,0,0.016283200681209566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,4,128,1,fp8,fp8,0,0.01674560010433197
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,1,128,1,float16,float16,0,0.1122320055961609
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,1,128,1,float16,fp8,0,0.1384511947631836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,1,128,1,fp8,fp8,0,0.13683040142059327
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,2,128,1,float16,float16,0,0.1145792007446289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,2,128,1,float16,fp8,0,0.136244797706604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,2,128,1,fp8,fp8,0,0.1379423975944519
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,4,128,1,float16,float16,0,0.1215824007987976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,4,128,1,float16,fp8,0,0.13768960237503053
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,4,128,1,fp8,fp8,0,0.13581119775772094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,4,128,1,fp8,fp8,0,0.01873439997434616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,12,128,1,float16,float16,0,0.08468000292778015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,12,128,1,float16,fp8,0,0.0923807978630066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,1,128,1,float16,float16,0,0.06391839981079102
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,12,128,1,fp8,fp8,0,0.0935375988483429
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,1,128,1,float16,fp8,0,0.0736303985118866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,1,128,1,fp8,fp8,0,0.07408000230789184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,2,128,1,float16,float16,0,0.06375359892845153
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,2,128,1,fp8,fp8,0,0.0740880012512207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,4,128,1,float16,float16,0,0.06802719831466675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,4,128,1,float16,fp8,0,0.07409440279006958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,4,128,1,fp8,fp8,0,0.07440159916877746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,12,128,1,float16,float16,0,0.047672000527381894
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,12,128,1,float16,fp8,0,0.051969599723815915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,12,128,1,fp8,fp8,0,0.051446402072906496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,1,128,1,float16,float16,0,0.03742560148239136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,1,128,1,float16,fp8,0,0.0423552006483078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,1,128,1,float16,float16,0,0.01472959965467453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,1,128,1,fp8,fp8,0,0.042798399925231934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,2,128,1,float16,float16,0,0.03651039898395538
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,2,128,1,float16,fp8,0,0.043038401007652285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,2,128,1,fp8,fp8,0,0.04255039989948273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,4,128,1,float16,float16,0,0.03930400013923645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,4,128,1,float16,fp8,0,0.04338400065898895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,4,128,1,fp8,fp8,0,0.043244799971580504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,12,128,1,float16,float16,0,0.026976001262664796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,12,128,1,float16,fp8,0,0.0311024010181427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,12,128,1,fp8,fp8,0,0.03094559907913208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,1,128,1,float16,float16,0,0.022998400032520294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,1,128,1,float16,fp8,0,0.02616479992866516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,1,128,1,fp8,fp8,0,0.026913601160049438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,2,128,1,float16,float16,0,0.022867199778556824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,2,128,1,float16,fp8,0,0.026939201354980468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,2,128,1,fp8,fp8,0,0.02672159969806671
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,4,128,1,float16,float16,0,0.024534399807453155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,4,128,1,fp8,fp8,0,0.026855999231338502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,12,128,1,float16,float16,0,0.018774400651454925
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,12,128,1,float16,fp8,0,0.020979200303554536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,12,128,1,fp8,fp8,0,0.020983999967575072
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,1,128,1,float16,float16,0,0.016787199676036833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,1,128,1,float16,fp8,0,0.018764799833297728
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,2,128,1,float16,float16,0,0.01685120016336441
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,1,128,1,fp8,fp8,0,0.019126400351524353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,2,128,1,float16,fp8,0,0.018673600256443025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,2,128,1,fp8,fp8,0,0.019308799505233766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,4,128,1,float16,float16,0,0.01857600063085556
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,4,128,1,float16,fp8,0,0.019046400487422944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,4,128,1,fp8,fp8,0,0.018886399269104005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,12,128,1,float16,float16,0,0.014707200229167938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,12,128,1,fp8,fp8,0,0.014793600142002105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,1,128,1,float16,float16,0,0.01284320056438446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,1,128,1,float16,fp8,0,0.012936000525951386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,2,128,1,float16,fp8,0,0.07416319847106934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,1,128,1,fp8,fp8,0,0.012835200130939483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,2,128,1,float16,float16,0,0.012643200159072877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,2,128,1,float16,fp8,0,0.013209599256515502
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,2,128,1,fp8,fp8,0,0.014350399374961853
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,4,128,1,float16,float16,0,0.012742400169372559
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,4,128,1,float16,fp8,0,0.014281600713729858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,4,128,1,fp8,fp8,0,0.014686399698257446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,12,128,1,float16,float16,0,0.012753599882125854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,12,128,1,float16,fp8,0,0.01268479973077774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,12,128,1,fp8,fp8,0,0.012777599692344665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,1,128,1,float16,float16,0,0.012630400061607362
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,1,128,1,float16,fp8,0,0.012664000689983367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,1,128,1,fp8,fp8,0,0.01268800050020218
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,2,128,1,float16,float16,0,0.012486399710178375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,2,128,1,float16,fp8,0,0.012641599774360657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,2,128,1,fp8,fp8,0,0.012702399492263794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,4,128,1,float16,float16,0,0.012630400061607362
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,4,128,1,float16,fp8,0,0.012783999741077422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,4,128,1,fp8,fp8,0,0.012624000012874604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,12,128,1,float16,float16,0,0.012622399628162384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,12,128,1,float16,fp8,0,0.012654399871826172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,12,128,1,fp8,fp8,0,0.012683199346065521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,1,128,1,float16,float16,0,0.012656000256538392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,1,128,1,fp8,fp8,0,0.012460800260305405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,1,128,1,float16,fp8,0,0.012574400007724761
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,2,128,1,float16,float16,0,0.012454400211572647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,2,128,1,float16,fp8,0,0.012611199915409089
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,2,128,1,fp8,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,4,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,4,128,1,float16,fp8,0,0.012481600046157837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,12,128,1,float16,fp8,0,0.014617599546909332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,4,128,1,fp8,fp8,0,0.012639999389648438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,12,128,1,float16,fp8,0,0.012683199346065521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,12,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,1,128,1,float16,float16,0,0.012596799433231354
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,1,128,1,float16,fp8,0,0.012577599287033081
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,1,128,1,fp8,fp8,0,0.012588800489902496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,2,128,1,float16,float16,0,0.01244639977812767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,2,128,1,float16,fp8,0,0.012561599910259246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,2,128,1,fp8,fp8,0,0.012598399817943574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,4,128,1,float16,fp8,0,0.012454400211572647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,4,128,1,fp8,fp8,0,0.01247519999742508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,1,128,1,float16,float16,0,0.08758400082588196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,1,128,1,float16,fp8,0,0.09741600155830384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,1,128,1,fp8,fp8,0,0.09867200255393982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,2,128,1,float16,float16,0,0.0881936013698578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,2,128,1,float16,fp8,0,0.09863839745521545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,2,128,1,fp8,fp8,0,0.09860159754753113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,4,128,1,float16,float16,0,0.09261919856071472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,4,128,1,float16,fp8,0,0.09872480034828186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,12,128,1,float16,float16,0,0.01249919980764389
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,4,128,1,fp8,fp8,0,0.09741600155830384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,12,128,1,float16,float16,0,0.057796800136566163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,4,128,1,float16,float16,0,0.012566399574279786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,12,128,1,float16,fp8,0,0.0619488000869751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,4,128,1,float16,fp8,0,0.026654401421546937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,12,128,1,fp8,fp8,0,0.06241440176963806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,1,128,1,float16,float16,0,0.04736959934234619
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,1,128,1,fp8,fp8,0,0.053534400463104245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,2,128,1,float16,float16,0,0.047414401173591615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,2,128,1,fp8,fp8,0,0.053465598821640016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,4,128,1,float16,float16,0,0.049609598517417905
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,4,128,1,float16,fp8,0,0.05351999998092651
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,4,128,1,fp8,fp8,0,0.05355200171470642
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,12,128,1,float16,float16,0,0.03329119980335236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,12,128,1,float16,fp8,0,0.03704800009727478
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,12,128,1,fp8,fp8,0,0.036950400471687316
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,1,128,1,float16,float16,0,0.028934401273727418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,1,128,1,float16,fp8,0,0.03115679919719696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,1,128,1,fp8,fp8,0,0.031007999181747438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,2,128,1,float16,float16,0,0.028947201371192933
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,2,128,1,float16,fp8,0,0.03102239966392517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,2,128,1,fp8,fp8,0,0.03123359978199005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,4,128,1,float16,float16,0,0.030246400833129884
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,1,128,1,float16,fp8,0,0.05360320210456848
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,4,128,1,float16,fp8,0,0.031523200869560244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,4,128,1,fp8,fp8,0,0.03115360140800476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,2,128,1,float16,fp8,0,0.053529602289199826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,12,128,1,float16,float16,0,0.020763200521469117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,12,128,1,float16,fp8,0,0.02292319983243942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,1,128,1,float16,float16,0,0.018718400597572328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,1,128,1,float16,fp8,0,0.020926399528980254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,1,128,1,fp8,fp8,0,0.02067680060863495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,2,128,1,float16,float16,0,0.018753600120544434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,2,128,1,float16,fp8,0,0.020788800716400147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,2,128,1,fp8,fp8,0,0.02080959975719452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,4,128,1,float16,float16,0,0.01870719939470291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,4,128,1,float16,fp8,0,0.020796799659729005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,12,128,1,float16,float16,0,0.016707199811935424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,12,128,1,float16,fp8,0,0.016711999475955964
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,1,128,1,float16,float16,0,0.014616000652313232
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,12,128,1,fp8,fp8,0,0.01706240028142929
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,1,128,1,float16,fp8,0,0.015809600055217744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,1,128,1,fp8,fp8,0,0.017131200432777403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,2,128,1,float16,float16,0,0.014620800316333771
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,2,128,1,fp8,fp8,0,0.0161423996090889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,2,128,1,float16,fp8,0,0.01693439930677414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,4,128,1,float16,fp8,0,0.016991999745368958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,4,128,1,float16,float16,0,0.014657600224018097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,12,128,1,float16,float16,0,0.012664000689983367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,4,128,1,fp8,fp8,0,0.01687999963760376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,12,128,1,float16,fp8,0,0.012574400007724761
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,12,128,1,fp8,fp8,0,0.012942400574684144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,1,128,1,float16,float16,0,0.011649599671363831
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,1,128,1,fp8,fp8,0,0.012547199428081513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,2,128,1,float16,float16,0,0.012510399520397186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,2,128,1,float16,fp8,0,0.012611199915409089
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,12,128,1,fp8,fp8,0,0.02287999987602234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,2,128,1,fp8,fp8,0,0.012668800354003907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,4,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,4,128,1,float16,fp8,0,0.012539200484752655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,12,128,1,float16,float16,0,0.012588800489902496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,12,128,1,float16,fp8,0,0.01255040019750595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,4,128,1,fp8,fp8,0,0.02069759964942932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,12,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,1,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,1,128,1,float16,fp8,0,0.010675200074911118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,1,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,2,128,1,float16,float16,0,0.010619200021028518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,2,128,1,fp8,fp8,0,0.01058719977736473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,4,128,1,float16,float16,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,4,128,1,float16,fp8,0,0.010527999699115753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,4,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,12,128,1,float16,float16,0,0.010598400235176086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,12,128,1,float16,fp8,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,12,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,1,128,1,float16,float16,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,1,128,1,float16,fp8,0,0.010593599826097488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,1,128,1,fp8,fp8,0,0.010532800108194351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,2,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,2,128,1,float16,fp8,0,0.010639999806880952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,2,128,1,fp8,fp8,0,0.010660800337791442
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,4,128,1,fp8,fp8,0,0.012668800354003907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,4,128,1,float16,fp8,0,0.01077279970049858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,4,128,1,fp8,fp8,0,0.010787200182676315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,12,128,1,float16,float16,0,0.011163199692964554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,12,128,1,float16,fp8,0,0.010760000348091126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,12,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,1,128,1,float16,float16,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,2,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,1,128,1,float16,fp8,0,0.010635200142860412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,1,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,2,128,1,float16,float16,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,2,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,2,128,1,fp8,fp8,0,0.010649599879980088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,4,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,4,128,1,float16,fp8,0,0.010619200021028518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,4,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,1,128,1,float16,float16,0,0.07339040040969849
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,1,128,1,float16,fp8,0,0.07801280021667481
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,1,128,1,fp8,fp8,0,0.07820159792900086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,2,128,1,float16,float16,0,0.07409759759902954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,2,128,1,float16,fp8,0,0.07835839986801148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,2,128,1,fp8,fp8,0,0.0787663996219635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,4,128,1,float16,float16,0,0.07622720003128051
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,4,128,1,float16,fp8,0,0.07888479828834534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,4,128,1,fp8,fp8,0,0.07826560139656066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,4,128,1,float16,float16,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,12,128,1,float16,float16,0,0.045577600598335266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,12,128,1,float16,fp8,0,0.047995200753211974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,12,128,1,fp8,fp8,0,0.049409601092338565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,1,128,1,float16,fp8,0,0.04349440038204193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,1,128,1,float16,float16,0,0.04140479862689972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,1,128,1,fp8,fp8,0,0.04347519874572754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,2,128,1,float16,float16,0,0.04127359986305237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,2,128,1,float16,fp8,0,0.04380959868431091
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,2,128,1,fp8,fp8,0,0.04339359998703003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,4,128,1,float16,float16,0,0.04343999922275543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,4,128,1,float16,fp8,0,0.04336479902267456
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,12,128,1,float16,float16,0,0.027009600400924684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,4,128,1,fp8,fp8,0,0.04496000111103058
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,12,128,1,float16,fp8,0,0.02898240089416504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,12,128,1,fp8,fp8,0,0.02924480140209198
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,1,128,1,float16,float16,0,0.02502399981021881
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,1,128,1,float16,fp8,0,0.02712000012397766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,1,128,1,fp8,fp8,0,0.02696479856967926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,2,128,1,float16,float16,0,0.02677760124206543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,2,128,1,float16,fp8,0,0.02696000039577484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,2,128,1,fp8,fp8,0,0.026915198564529418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,1,128,1,float16,fp8,0,0.012729600071907043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,4,128,1,float16,fp8,0,0.02704479992389679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,4,128,1,fp8,fp8,0,0.027127999067306518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,12,128,1,float16,float16,0,0.018777599930763243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,12,128,1,float16,fp8,0,0.018911999464035035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,12,128,1,fp8,fp8,0,0.0188511997461319
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,1,128,1,float16,float16,0,0.017430399358272553
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,1,128,1,float16,fp8,0,0.018783999979496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,1,128,1,fp8,fp8,0,0.018620799481868743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,2,128,1,float16,fp8,0,0.018718400597572328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,2,128,1,fp8,fp8,0,0.018806399405002595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,4,128,1,float16,float16,0,0.01716960072517395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,4,128,1,float16,fp8,0,0.018852800130844116
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,12,128,1,float16,float16,0,0.014684799313545226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,12,128,1,float16,fp8,0,0.014747199416160584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,12,128,1,fp8,fp8,0,0.014608000218868256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,1,128,1,float16,float16,0,0.014560000598430633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,1,128,1,float16,fp8,0,0.014734399318695069
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,1,128,1,fp8,fp8,0,0.014678399264812469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,4,128,1,float16,float16,0,0.026950401067733765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,2,128,1,float16,float16,0,0.014779199659824372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,2,128,1,fp8,fp8,0,0.014688000082969666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,2,128,1,float16,fp8,0,0.014628799259662628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,4,128,1,float16,float16,0,0.014636799693107605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,4,128,1,float16,fp8,0,0.01462240070104599
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,4,128,1,fp8,fp8,0,0.014735999703407287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,12,128,1,float16,float16,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,12,128,1,float16,fp8,0,0.012539200484752655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,2,128,1,float16,float16,0,0.016787199676036833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,1,128,1,float16,float16,0,0.010655999928712846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,1,128,1,float16,fp8,0,0.010622400045394897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,1,128,1,fp8,fp8,0,0.01072319969534874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,2,128,1,float16,float16,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,2,128,1,float16,fp8,0,0.010779199749231338
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,4,128,1,float16,float16,0,0.010623999685049058
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,4,128,1,float16,fp8,0,0.01104160025715828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,4,128,1,fp8,fp8,0,0.0107744000852108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,12,128,1,float16,float16,0,0.010812799632549285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,12,128,1,float16,fp8,0,0.010667199641466141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,12,128,1,fp8,fp8,0,0.01067039966583252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,1,128,1,float16,float16,0,0.01061279997229576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,1,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,1,128,1,fp8,fp8,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,2,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,2,128,1,float16,fp8,0,0.010598400235176086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,2,128,1,fp8,fp8,0,0.01061279997229576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,4,128,1,float16,float16,0,0.010583999752998351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,4,128,1,float16,fp8,0,0.010571199655532836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,12,128,1,fp8,fp8,0,0.010567999631166457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,4,128,1,fp8,fp8,0,0.010603199899196624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,12,128,1,float16,fp8,0,0.010715200006961823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,12,128,1,fp8,fp8,0,0.010601600259542465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,2,128,1,fp8,fp8,0,0.010846400260925293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,1,128,1,float16,float16,0,0.01064319983124733
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,1,128,1,float16,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,1,128,1,fp8,fp8,0,0.010601600259542465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,2,128,1,float16,float16,0,0.010567999631166457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,2,128,1,float16,fp8,0,0.010755199939012527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,4,128,1,float16,float16,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,4,128,1,fp8,fp8,0,0.01871040016412735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,4,128,1,float16,fp8,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,4,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,12,128,1,float16,float16,0,0.010547199845314026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,12,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,12,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,1,128,1,float16,fp8,0,0.010728000104427338
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,1,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,2,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,2,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,2,128,1,fp8,fp8,0,0.010599999874830245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,4,128,1,float16,float16,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,12,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,4,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,4,128,1,fp8,fp8,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,1,128,1,float16,float16,0,0.07005599737167359
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,2,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,1,128,1,float16,fp8,0,0.06796320080757141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,1,128,1,fp8,fp8,0,0.06993439793586731
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,2,128,1,float16,float16,0,0.07008799910545349
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,2,128,1,float16,fp8,0,0.06981920003890991
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,2,128,1,fp8,fp8,0,0.06823359727859497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,4,128,1,float16,float16,0,0.07208319902420043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,4,128,1,fp8,fp8,0,0.06974560022354126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,12,128,1,float16,float16,0,0.041283199191093446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,12,128,1,float16,fp8,0,0.04129599928855896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,12,128,1,fp8,fp8,0,0.041252800822258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,1,128,1,float16,float16,0,0.039550399780273436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,1,128,1,float16,fp8,0,0.0390639990568161
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,1,128,1,fp8,fp8,0,0.039262399077415466
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,2,128,1,float16,float16,0,0.03982079923152924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,2,128,1,float16,fp8,0,0.03930239975452423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,2,128,1,fp8,fp8,0,0.03930239975452423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,4,128,1,float16,float16,0,0.04126879870891571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,4,128,1,float16,fp8,0,0.03925440013408661
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,4,128,1,fp8,fp8,0,0.03927839994430542
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,12,128,1,float16,float16,0,0.02613919973373413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,12,128,1,float16,fp8,0,0.026547199487686156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,12,128,1,fp8,fp8,0,0.02677760124206543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,1,128,1,float16,float16,0,0.024799999594688416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,1,128,1,float16,fp8,0,0.024852800369262695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,1,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,1,128,1,fp8,fp8,0,0.024833600223064422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,2,128,1,float16,fp8,0,0.024899199604988098
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,2,128,1,float16,float16,0,0.024872000515460967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,4,128,1,float16,float16,0,0.02484000027179718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,2,128,1,fp8,fp8,0,0.025006398558616638
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,4,128,1,float16,fp8,0,0.024846400320529937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,12,128,1,float16,float16,0,0.01765120029449463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,4,128,1,fp8,fp8,0,0.025323200225830077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,12,128,1,fp8,fp8,0,0.016864000260829924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,1,128,1,float16,float16,0,0.01709440052509308
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,1,128,1,fp8,fp8,0,0.017132799327373504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,2,128,1,float16,float16,0,0.01723040044307709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,2,128,1,float16,fp8,0,0.01717440038919449
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,2,128,1,fp8,fp8,0,0.017071999609470367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,4,128,1,float16,float16,0,0.017100800573825837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,4,128,1,float16,fp8,0,0.01725279986858368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,4,128,1,fp8,fp8,0,0.017179200053215028
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,12,128,1,float16,float16,0,0.015052799880504609
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,12,128,1,float16,fp8,0,0.014844800531864166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,4,128,1,float16,fp8,0,0.0680191993713379
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,12,128,1,fp8,fp8,0,0.01480959951877594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,1,128,1,float16,float16,0,0.01255200058221817
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,1,128,1,float16,fp8,0,0.014767999947071075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,1,128,1,fp8,fp8,0,0.012676799297332763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,2,128,1,float16,float16,0,0.014764800667762756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,2,128,1,float16,fp8,0,0.012577599287033081
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,2,128,1,fp8,fp8,0,0.014737600088119506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,4,128,1,float16,float16,0,0.012649600207805634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,1,128,1,float16,fp8,0,0.016817599534988403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,4,128,1,float16,fp8,0,0.01422560065984726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,12,128,1,float16,float16,0,0.012494400143623352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,12,128,1,float16,fp8,0,0.010740800201892853
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,12,128,1,fp8,fp8,0,0.010601600259542465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,1,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,1,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,2,128,1,float16,float16,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,2,128,1,float16,fp8,0,0.010596799850463866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,2,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,4,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,4,128,1,float16,fp8,0,0.010619200021028518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,4,128,1,fp8,fp8,0,0.010740800201892853
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,12,128,1,float16,float16,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,12,128,1,float16,fp8,0,0.010651200264692306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,12,128,1,fp8,fp8,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,1,128,1,float16,float16,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,1,128,1,float16,fp8,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,1,128,1,fp8,fp8,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,2,128,1,float16,float16,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,2,128,1,float16,fp8,0,0.010667199641466141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,2,128,1,fp8,fp8,0,0.010590399801731109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,12,128,1,float16,fp8,0,0.01668799966573715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,4,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,4,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,4,128,1,fp8,fp8,0,0.01061440035700798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,12,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,12,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,4,128,1,fp8,fp8,0,0.012775999307632447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,12,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,1,128,1,float16,float16,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,1,128,1,float16,fp8,0,0.010630399733781815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,2,128,1,float16,float16,0,0.010710400342941285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,2,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,1,128,1,float16,float16,0,0.010755199939012527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,4,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,4,128,1,float16,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,4,128,1,fp8,fp8,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,12,128,1,float16,float16,0,0.010699199885129929
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,12,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,12,128,1,fp8,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,1,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,1,128,1,float16,fp8,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,1,128,1,fp8,fp8,0,0.010622400045394897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,2,128,1,float16,float16,0,0.010894399881362916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,2,128,1,float16,fp8,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,2,128,1,fp8,fp8,0,0.01072480008006096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,4,128,1,float16,float16,0,0.01077599972486496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,4,128,1,float16,fp8,0,0.010681600123643876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,4,128,1,fp8,fp8,0,0.010579200088977813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,1,128,1,float16,float16,0,0.06961600184440613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,1,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,1,128,1,float16,fp8,0,0.06420959830284119
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,2,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,1,128,1,fp8,fp8,0,0.06378560066223145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,2,128,1,float16,float16,0,0.06948000192642212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,2,128,1,fp8,fp8,0,0.0637776017189026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,4,128,1,float16,float16,0,0.0690015971660614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,4,128,1,float16,fp8,0,0.06378080248832703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,4,128,1,fp8,fp8,0,0.06430879831314087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,0,0.03723520040512085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,12,128,1,fp8,fp8,0,0.03760640025138855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,1,128,1,float16,float16,0,0.03951199948787689
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,1,128,1,float16,fp8,0,0.03735359907150269
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,1,128,1,fp8,fp8,0,0.03745599985122681
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,2,128,1,float16,float16,0,0.03938559889793396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,2,128,1,float16,fp8,0,0.037319999933242795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,2,128,1,fp8,fp8,0,0.037539198994636536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,4,128,1,float16,float16,0,0.03952000141143799
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,2,128,1,float16,fp8,0,0.06390720009803771
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,4,128,1,float16,fp8,0,0.03739520013332367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,4,128,1,fp8,fp8,0,0.03716480135917664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,12,128,1,fp8,fp8,0,0.02314079999923706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,0,0.025255998969078063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,0,0.023396800458431243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,1,128,1,float16,float16,0,0.024911999702453613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,0,0.03913759887218475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,1,128,1,float16,fp8,0,0.023078399896621703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,2,128,1,float16,float16,0,0.024886399507522583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,2,128,1,float16,fp8,0,0.022830399870872497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,2,128,1,fp8,fp8,0,0.02346239984035492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,4,128,1,float16,fp8,0,0.02279040068387985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,4,128,1,fp8,fp8,0,0.02479040026664734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,0,0.016838400065898894
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,12,128,1,fp8,fp8,0,0.016816000640392303
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,1,128,1,float16,float16,0,0.01679999977350235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,1,128,1,float16,fp8,0,0.016777600347995757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,1,128,1,fp8,fp8,0,0.016646400094032288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,2,128,1,float16,float16,0,0.01677280068397522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,2,128,1,float16,fp8,0,0.021209600567817687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,2,128,1,fp8,fp8,0,0.016646400094032288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,4,128,1,float16,float16,0,0.016787199676036833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,4,128,1,float16,fp8,0,0.01653759926557541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,4,128,1,fp8,fp8,0,0.01656319946050644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,0,0.01454399973154068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,0,0.012670400738716125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,12,128,1,fp8,fp8,0,0.012726399302482604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,1,128,1,float16,float16,0,0.014407999813556671
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,4,128,1,float16,float16,0,0.024908800423145295
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,1,128,1,float16,fp8,0,0.012963199615478515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,1,128,1,fp8,fp8,0,0.012600000202655792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,0,0.016572800278663636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,2,128,1,float16,float16,0,0.01451520025730133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,2,128,1,fp8,fp8,0,0.012503999471664428
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,4,128,1,float16,float16,0,0.014451199769973755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,4,128,1,float16,fp8,0,0.012555199861526489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,0,0.012483199685811996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,12,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,1,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,1,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,1,128,1,fp8,fp8,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,2,128,1,float16,float16,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,2,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,2,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,4,128,1,float16,float16,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,4,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,4,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,0,0.010532800108194351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,12,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,1,128,1,fp8,fp8,0,0.02306399941444397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,1,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,2,128,1,float16,fp8,0,0.012831999361515046
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,1,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,1,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,2,128,1,float16,float16,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,2,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,2,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,4,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,0,0.010604800283908844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,12,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,1,128,1,float16,float16,0,0.010636799782514573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,1,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,1,128,1,fp8,fp8,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,2,128,1,float16,float16,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,2,128,1,float16,fp8,0,0.010585600137710571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,2,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,4,128,1,float16,float16,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,4,128,1,float16,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,4,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,4,128,1,float16,float16,0,0.01072160005569458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,4,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,0,0.010622400045394897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,1,128,1,float16,float16,0,0.010790400207042694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,1,128,1,float16,fp8,0,0.010592000186443329
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,1,128,1,fp8,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,2,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,2,128,1,float16,fp8,0,0.01061440035700798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,2,128,1,fp8,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,4,128,1,float16,float16,0,0.010558400303125381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,4,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,4,128,1,fp8,fp8,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,4,128,1,fp8,fp8,0,0.012768000364303589
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,12,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,1,128,1,float16,float16,0,3.279692840576172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,1,128,1,float16,fp8,0,2.757526397705078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,1,128,1,fp8,fp8,0,2.752035140991211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,2,128,1,float16,fp8,0,2.7728143692016602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,2,128,1,fp8,fp8,0,2.731625556945801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,2,128,1,float16,float16,0,4.536312103271484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,4,128,1,float16,float16,0,4.305070495605468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,4,128,1,float16,fp8,0,2.7824560165405274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,4,128,1,fp8,fp8,0,2.77050724029541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,8,128,1,float16,float16,0,2.247257614135742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,8,128,1,float16,fp8,0,1.689072036743164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,8,128,1,fp8,fp8,0,1.4723855972290039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,1,128,1,float16,float16,0,1.5696864128112793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,1,128,1,fp8,fp8,0,1.4452816009521485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,1,128,1,float16,fp8,0,1.9557439804077148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,2,128,1,float16,float16,0,1.819887924194336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,2,128,1,float16,fp8,0,1.4647104263305664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,2,128,1,fp8,fp8,0,1.451961612701416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,4,128,1,float16,float16,0,1.6082880020141601
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,4,128,1,float16,fp8,0,1.5638031959533691
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,4,128,1,fp8,fp8,0,1.4268223762512207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,8,128,1,float16,fp8,0,0.9640128135681152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,1,128,1,float16,fp8,0,0.8132960319519043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,8,128,1,fp8,fp8,0,0.7934751987457276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,1,128,1,fp8,fp8,0,0.783289623260498
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,2,128,1,float16,float16,0,0.8844016075134278
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,2,128,1,float16,fp8,0,0.7827231884002686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,2,128,1,fp8,fp8,0,0.8026639938354492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,8,128,1,float16,float16,0,0.9284928321838379
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,4,128,1,float16,float16,0,0.8802927970886231
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,4,128,1,float16,fp8,0,0.8324527740478516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,8,128,1,float16,float16,0,0.5256576061248779
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,8,128,1,float16,fp8,0,0.48578882217407227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,4,128,1,fp8,fp8,0,0.7921487808227539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,1,128,1,float16,float16,0,0.8635871887207032
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,8,128,1,fp8,fp8,0,0.4657440185546875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,1,128,1,float16,float16,0,0.5116960048675537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,1,128,1,fp8,fp8,0,0.4561728000640869
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,2,128,1,float16,float16,0,0.5185840129852295
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,2,128,1,float16,fp8,0,0.4633312225341797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,2,128,1,fp8,fp8,0,0.4561279773712158
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,4,128,1,float16,float16,0,0.5175007820129395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,4,128,1,float16,fp8,0,0.46292800903320314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,4,128,1,fp8,fp8,0,0.454863977432251
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,1,128,1,float16,fp8,0,1.6437343597412108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,1,128,1,float16,float16,0,1.8299200057983398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,1,128,1,float16,fp8,0,0.4640480041503906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,2,128,1,float16,float16,0,1.9051664352416993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,2,128,1,float16,fp8,0,2.2749168395996096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,2,128,1,fp8,fp8,0,1.6374719619750977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,1,128,1,fp8,fp8,0,1.6238880157470703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,4,128,1,float16,fp8,0,1.6223888397216797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,4,128,1,fp8,fp8,0,1.6863359451293944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,8,128,1,float16,float16,0,0.987332820892334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,8,128,1,float16,fp8,0,0.9066351890563965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,8,128,1,fp8,fp8,0,0.8809167861938476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,1,128,1,float16,float16,0,0.9433728218078613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,1,128,1,float16,fp8,0,0.8694944381713867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,1,128,1,fp8,fp8,0,0.8798975944519043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,2,128,1,float16,fp8,0,0.8673503875732422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,2,128,1,float16,float16,0,1.180622386932373
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,2,128,1,fp8,fp8,0,0.8758848190307618
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,4,128,1,float16,float16,0,2.1950704574584963
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,4,128,1,float16,float16,0,0.9729791641235351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,4,128,1,float16,fp8,0,0.900545597076416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,4,128,1,fp8,fp8,0,0.8794879913330078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,8,128,1,float16,float16,0,0.5653791904449463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,8,128,1,float16,fp8,0,0.523740816116333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,8,128,1,fp8,fp8,0,0.49151039123535156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,1,128,1,float16,float16,0,0.5482240200042725
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,1,128,1,float16,fp8,0,0.4944784164428711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,1,128,1,fp8,fp8,0,0.5611408233642579
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,2,128,1,float16,float16,0,0.5319952011108399
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,2,128,1,float16,fp8,0,0.4941103935241699
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,2,128,1,fp8,fp8,0,0.4934800148010254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,4,128,1,float16,fp8,0,0.48696160316467285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,8,128,1,float16,float16,0,0.336247992515564
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,4,128,1,fp8,fp8,0,0.49584641456604006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,8,128,1,float16,fp8,0,0.28947200775146487
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,8,128,1,fp8,fp8,0,0.2966687917709351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,1,128,1,float16,float16,0,0.32041919231414795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,1,128,1,fp8,fp8,0,0.29690721035003664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,2,128,1,float16,float16,0,0.321235203742981
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,2,128,1,float16,fp8,0,0.29557440280914304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,2,128,1,fp8,fp8,0,0.2953007936477661
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,4,128,1,float16,float16,0,0.32527999877929686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,4,128,1,float16,fp8,0,0.2951807975769043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,4,128,1,fp8,fp8,0,0.2947200059890747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,4,128,1,float16,float16,0,0.543339204788208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,1,128,1,float16,float16,0,1.2914655685424805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,1,128,1,float16,fp8,0,0.2900207996368408
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,1,128,1,float16,fp8,0,1.1803183555603027
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,1,128,1,fp8,fp8,0,1.2403231620788575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,2,128,1,float16,float16,0,1.2675663948059082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,2,128,1,float16,fp8,0,1.180793571472168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,2,128,1,fp8,fp8,0,1.1891311645507812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,4,128,1,float16,float16,0,1.478105640411377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,4,128,1,float16,fp8,0,1.2095919609069825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,4,128,1,fp8,fp8,0,1.1867839813232421
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,8,128,1,float16,fp8,0,0.6420527935028076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,8,128,1,float16,float16,0,1.0259200096130372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,8,128,1,fp8,fp8,0,0.6443664073944092
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,1,128,1,float16,float16,0,0.6975552082061768
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,1,128,1,float16,fp8,0,0.6940896034240722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,1,128,1,fp8,fp8,0,0.6465888023376465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,2,128,1,float16,float16,0,0.7165567874908447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,2,128,1,float16,fp8,0,0.6363999843597412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,4,128,1,float16,float16,0,0.7192255973815918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,4,128,1,float16,fp8,0,0.8914223670959472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,4,128,1,fp8,fp8,0,0.6346735954284668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,8,128,1,float16,float16,0,0.41930079460144043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,8,128,1,float16,fp8,0,0.36693921089172366
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,8,128,1,fp8,fp8,0,0.36492319107055665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,1,128,1,float16,float16,0,0.39981279373168943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,1,128,1,float16,fp8,0,0.3631295919418335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,1,128,1,fp8,fp8,0,0.36193759441375734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,2,128,1,float16,float16,0,0.39974079132080076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,2,128,1,float16,fp8,0,0.3639120101928711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,2,128,1,fp8,fp8,0,0.3638672113418579
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,4,128,1,float16,float16,0,0.4072688102722168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,4,128,1,float16,fp8,0,0.3629215955734253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,4,128,1,fp8,fp8,0,0.3626607894897461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,8,128,1,float16,float16,0,0.25484158992767336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,8,128,1,float16,fp8,0,0.2253472089767456
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,8,128,1,fp8,fp8,0,0.22416160106658936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,1,128,1,float16,float16,0,0.24413599967956542
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,1,128,1,float16,fp8,0,0.22517919540405273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,1,128,1,fp8,fp8,0,0.22542879581451417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,2,128,1,float16,float16,0,0.24371039867401123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,2,128,1,float16,fp8,0,0.22520320415496825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,2,128,1,fp8,fp8,0,0.22453439235687256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,4,128,1,float16,float16,0,0.24759359359741212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,2,128,1,fp8,fp8,0,0.6494639873504638
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,4,128,1,float16,fp8,0,0.22500479221343994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,4,128,1,fp8,fp8,0,0.22464799880981445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,1,128,1,float16,float16,0,1.7092672348022462
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,1,128,1,fp8,fp8,0,1.5313776016235352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,2,128,1,float16,float16,0,1.6073503494262695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,2,128,1,fp8,fp8,0,1.5423055648803712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,2,128,1,float16,fp8,0,1.7124143600463868
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,4,128,1,float16,float16,0,2.1169599533081054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,4,128,1,float16,fp8,0,1.5413248062133789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,8,128,1,float16,float16,0,1.0764320373535157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,4,128,1,fp8,fp8,0,1.5370944023132325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,1,128,1,float16,fp8,0,1.5429632186889648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,8,128,1,float16,fp8,0,0.8100319862365722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,8,128,1,fp8,fp8,0,0.9049839973449707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,1,128,1,float16,fp8,0,0.8176239967346192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,1,128,1,fp8,fp8,0,0.8085616111755372
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,2,128,1,float16,float16,0,0.8589952468872071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,2,128,1,float16,fp8,0,0.8113056182861328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,2,128,1,fp8,fp8,0,0.8065183639526368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,4,128,1,float16,float16,0,1.0454976081848144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,4,128,1,float16,fp8,0,0.8090928077697754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,4,128,1,fp8,fp8,0,0.8042976379394531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,8,128,1,float16,float16,0,0.5906288146972656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,8,128,1,float16,fp8,0,0.44904317855834963
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,8,128,1,fp8,fp8,0,0.44509282112121584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,1,128,1,float16,float16,0,0.4912399768829346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,1,128,1,float16,fp8,0,0.44936480522155764
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,1,128,1,fp8,fp8,0,0.445308780670166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,1,128,1,float16,float16,0,0.8768416404724121
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,2,128,1,float16,float16,0,0.4788832187652588
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,2,128,1,float16,fp8,0,0.4505887985229492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,2,128,1,fp8,fp8,0,0.445358419418335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,4,128,1,float16,float16,0,0.49396481513977053
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,4,128,1,float16,fp8,0,0.4500607967376709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,8,128,1,float16,float16,0,0.2920207977294922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,8,128,1,float16,fp8,0,0.26284959316253664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,4,128,1,fp8,fp8,0,0.44784321784973147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,8,128,1,fp8,fp8,0,0.25906240940093994
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,1,128,1,float16,fp8,0,0.26178879737854005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,1,128,1,fp8,fp8,0,0.259331202507019
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,2,128,1,float16,float16,0,0.2728384017944336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,2,128,1,float16,fp8,0,0.26308479309082033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,2,128,1,fp8,fp8,0,0.260481595993042
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,4,128,1,float16,float16,0,0.27975199222564695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,4,128,1,float16,fp8,0,0.2602560043334961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,8,128,1,float16,float16,0,0.17987040281295777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,8,128,1,fp8,fp8,0,0.1622655987739563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,1,128,1,float16,float16,0,0.1730847954750061
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,1,128,1,float16,fp8,0,0.16230080127716065
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,1,128,1,fp8,fp8,0,0.1626960039138794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,2,128,1,float16,float16,0,0.17373280525207518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,2,128,1,float16,fp8,0,0.16221120357513427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,1,128,1,float16,float16,0,0.2732255935668945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,2,128,1,fp8,fp8,0,0.1639664053916931
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,4,128,1,float16,float16,0,0.17719199657440185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,4,128,1,float16,fp8,0,0.1640544056892395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,4,128,1,fp8,fp8,0,0.2609616041183472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,4,128,1,fp8,fp8,0,0.1642799973487854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,1,128,1,float16,float16,0,0.9934032440185547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,1,128,1,float16,fp8,0,0.9350095748901367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,1,128,1,fp8,fp8,0,0.9421135902404785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,2,128,1,float16,float16,0,0.9724495887756348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,2,128,1,float16,fp8,0,0.9475055694580078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,8,128,1,float16,fp8,0,0.16503520011901857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,2,128,1,fp8,fp8,0,1.0322943687438966
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,4,128,1,float16,float16,0,1.0369999885559082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,4,128,1,float16,fp8,0,0.942409610748291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,4,128,1,fp8,fp8,0,0.9320367813110352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,8,128,1,float16,float16,0,0.5642496109008789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,8,128,1,fp8,fp8,0,0.5060272216796875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,1,128,1,float16,float16,0,0.5279280185699463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,1,128,1,float16,fp8,0,0.5013999938964844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,1,128,1,fp8,fp8,0,0.5083807945251465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,2,128,1,float16,float16,0,0.5278863906860352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,2,128,1,fp8,fp8,0,0.507204818725586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,2,128,1,float16,fp8,0,0.5002624034881592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,4,128,1,float16,float16,0,0.5902175903320312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,4,128,1,float16,fp8,0,0.5044559955596923
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,8,128,1,float16,float16,0,0.3710927963256836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,8,128,1,float16,fp8,0,0.283515191078186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,8,128,1,fp8,fp8,0,0.3512720108032227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,1,128,1,float16,float16,0,0.29986560344696045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,1,128,1,float16,fp8,0,0.2819008111953735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,1,128,1,fp8,fp8,0,0.28324639797210693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,2,128,1,float16,float16,0,0.29528961181640623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,2,128,1,float16,fp8,0,0.2835263967514038
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,2,128,1,fp8,fp8,0,0.279367995262146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,4,128,1,float16,float16,0,0.3075040102005005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,4,128,1,float16,fp8,0,0.27933440208435056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,4,128,1,fp8,fp8,0,0.2839855909347534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,8,128,1,float16,float16,0,0.1860368013381958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,8,128,1,float16,fp8,0,0.16841119527816772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,8,128,1,fp8,fp8,0,0.16346880197525024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,1,128,1,float16,float16,0,0.17463040351867676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,1,128,1,fp8,fp8,0,0.16671040058135986
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,4,128,1,fp8,fp8,0,0.4995744228363037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,2,128,1,float16,fp8,0,0.1642240047454834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,2,128,1,fp8,fp8,0,0.1644255995750427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,4,128,1,float16,float16,0,0.1768656015396118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,4,128,1,float16,fp8,0,0.1643504023551941
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,8,128,1,float16,fp8,0,0.5066351890563965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,4,128,1,fp8,fp8,0,0.1647264003753662
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,8,128,1,float16,fp8,0,0.10493119955062866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,8,128,1,fp8,fp8,0,0.1047584056854248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,1,128,1,float16,float16,0,0.10913920402526855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,1,128,1,float16,fp8,0,0.10482239723205566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,1,128,1,fp8,fp8,0,0.1048367977142334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,1,128,1,float16,fp8,0,0.16353919506072997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,2,128,1,float16,float16,0,0.11010880470275879
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,2,128,1,float16,fp8,0,0.10446560382843018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,2,128,1,fp8,fp8,0,0.10535520315170288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,4,128,1,float16,float16,0,0.11128319501876831
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,2,128,1,float16,float16,0,0.17462719678878785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,4,128,1,float16,fp8,0,0.10476160049438477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,4,128,1,fp8,fp8,0,0.10479680299758912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,1,128,1,float16,float16,0,0.9250288009643555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,1,128,1,float16,fp8,0,0.9132191658020019
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,1,128,1,fp8,fp8,0,0.9174112319946289
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,8,128,1,float16,float16,0,0.11727999448776245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,2,128,1,float16,float16,0,0.9659040451049805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,2,128,1,float16,fp8,0,0.912662410736084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,2,128,1,fp8,fp8,0,0.9152912139892578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,8,128,1,float16,float16,0,0.544817590713501
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,4,128,1,float16,float16,0,0.9714287757873535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,4,128,1,fp8,fp8,0,0.9128000259399414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,4,128,1,float16,fp8,0,1.2060784339904784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,8,128,1,float16,fp8,0,0.48180160522460935
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,8,128,1,fp8,fp8,0,0.48473119735717773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,1,128,1,float16,float16,0,0.4849855899810791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,1,128,1,fp8,fp8,0,0.48956642150878904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,2,128,1,float16,float16,0,0.5155087947845459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,2,128,1,float16,fp8,0,0.4821616172790527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,2,128,1,fp8,fp8,0,0.48418560028076174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,4,128,1,float16,float16,0,0.574399995803833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,4,128,1,fp8,fp8,0,0.48283681869506834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,8,128,1,float16,float16,0,0.3324352025985718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,8,128,1,float16,fp8,0,0.2646912097930908
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,8,128,1,fp8,fp8,0,0.2663952112197876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,1,128,1,float16,float16,0,0.26854400634765624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,1,128,1,float16,fp8,0,0.2687391996383667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,1,128,1,fp8,fp8,0,0.26473441123962405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,2,128,1,float16,float16,0,0.2734031915664673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,2,128,1,fp8,fp8,0,0.26774399280548095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,1,128,1,float16,fp8,0,0.4958000183105469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,4,128,1,float16,float16,0,0.28247039318084716
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,4,128,1,float16,fp8,0,0.2678832054138184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,4,128,1,fp8,fp8,0,0.2692015886306763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,8,128,1,float16,float16,0,0.174726402759552
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,4,128,1,float16,fp8,0,0.4803647994995117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,8,128,1,float16,fp8,0,0.1544543981552124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,8,128,1,fp8,fp8,0,0.15336159467697144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,1,128,1,float16,float16,0,0.1522063970565796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,1,128,1,fp8,fp8,0,0.1543071985244751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,2,128,1,float16,float16,0,0.15620959997177125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,2,128,1,float16,fp8,0,0.1522704005241394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,2,128,1,fp8,fp8,0,0.1521504044532776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,4,128,1,float16,float16,0,0.16194080114364623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,4,128,1,float16,fp8,0,0.1523311972618103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,2,128,1,float16,fp8,0,0.2650320053100586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,4,128,1,fp8,fp8,0,0.15238879919052123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,8,128,1,float16,float16,0,0.10548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,8,128,1,float16,fp8,0,0.09552639722824097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,8,128,1,fp8,fp8,0,0.0947376012802124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,1,128,1,float16,float16,0,0.09857760071754455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,1,128,1,fp8,fp8,0,0.09565280079841613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,2,128,1,float16,float16,0,0.09810400009155273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,2,128,1,float16,fp8,0,0.09472159743309021
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,2,128,1,fp8,fp8,0,0.09471039772033692
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,4,128,1,float16,float16,0,0.10008000135421753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,1,128,1,float16,fp8,0,0.15187679529190062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,4,128,1,float16,fp8,0,0.09523839950561523
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,4,128,1,fp8,fp8,0,0.09527360200881958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,8,128,1,float16,float16,0,0.064411199092865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,8,128,1,float16,fp8,0,0.060252797603607175
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,8,128,1,fp8,fp8,0,0.060222399234771726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,1,128,1,float16,float16,0,0.06177440285682678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,1,128,1,fp8,fp8,0,0.061140799522399904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,2,128,1,float16,float16,0,0.06176959872245789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,2,128,1,float16,fp8,0,0.06028320193290711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,2,128,1,fp8,fp8,0,0.06107519865036011
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,4,128,1,float16,float16,0,0.06263520121574402
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,4,128,1,float16,fp8,0,0.06121600270271301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,4,128,1,fp8,fp8,0,0.05974559783935547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,1,128,1,float16,fp8,0,0.09471679925918579
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,1,128,1,float16,float16,0,0.5747776031494141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,1,128,1,float16,fp8,0,0.5823855876922608
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,1,128,1,fp8,fp8,0,0.580452823638916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,2,128,1,float16,float16,0,0.5706287860870362
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,2,128,1,float16,fp8,0,0.5825984001159668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,2,128,1,fp8,fp8,0,0.5791232109069824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,4,128,1,float16,fp8,0,0.5798639774322509
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,1,128,1,float16,fp8,0,0.061375999450683595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,4,128,1,float16,float16,0,0.5995584011077881
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,4,128,1,fp8,fp8,0,0.5780943870544434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,8,128,1,float16,float16,0,0.34595680236816406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,8,128,1,float16,fp8,0,0.31289920806884763
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,1,128,1,float16,float16,0,0.3022320032119751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,1,128,1,float16,fp8,0,0.3141855955123901
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,1,128,1,fp8,fp8,0,0.31567039489746096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,2,128,1,float16,fp8,0,0.3105072021484375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,2,128,1,fp8,fp8,0,0.3148191928863525
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,4,128,1,float16,float16,0,0.31544640064239504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,4,128,1,float16,fp8,0,0.31336159706115724
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,4,128,1,fp8,fp8,0,0.31002240180969237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,8,128,1,float16,float16,0,0.19384959936141968
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,8,128,1,float16,fp8,0,0.17282719612121583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,8,128,1,fp8,fp8,0,0.17319680452346803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,1,128,1,float16,float16,0,0.17014080286026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,1,128,1,float16,fp8,0,0.17227519750595094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,1,128,1,fp8,fp8,0,0.1711199998855591
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,2,128,1,float16,float16,0,0.17071199417114258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,2,128,1,float16,fp8,0,0.17191359996795655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,2,128,1,fp8,fp8,0,0.17346400022506714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,4,128,1,float16,float16,0,0.17851200103759765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,4,128,1,fp8,fp8,0,0.17282400131225586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,8,128,1,fp8,fp8,0,0.3120287895202637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,8,128,1,float16,float16,0,0.11283999681472778
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,8,128,1,float16,fp8,0,0.10096479654312134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,2,128,1,float16,float16,0,0.30007998943328856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,1,128,1,float16,float16,0,0.10043200254440307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,1,128,1,float16,fp8,0,0.09860799908638
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,1,128,1,fp8,fp8,0,0.10061119794845581
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,2,128,1,float16,float16,0,0.0981007993221283
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,2,128,1,float16,fp8,0,0.10081119537353515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,2,128,1,fp8,fp8,0,0.0993183970451355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,4,128,1,float16,float16,0,0.10492160320281982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,4,128,1,float16,fp8,0,0.09886720180511474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,4,128,1,fp8,fp8,0,0.10079519748687744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,8,128,1,float16,float16,0,0.06815199851989746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,8,128,1,float16,fp8,0,0.06388159990310668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,8,128,1,fp8,fp8,0,0.06363999843597412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,1,128,1,float16,float16,0,0.06383200287818909
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,1,128,1,float16,fp8,0,0.06328960061073304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,2,128,1,float16,float16,0,0.06339840292930603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,2,128,1,float16,fp8,0,0.06372640132904053
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,2,128,1,fp8,fp8,0,0.06347519755363465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,4,128,1,float16,float16,0,0.064547199010849
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,4,128,1,float16,fp8,0,0.06267840266227723
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,4,128,1,float16,fp8,0,0.1741984009742737
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,4,128,1,fp8,fp8,0,0.062377601861953735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,8,128,1,float16,float16,0,0.04988000094890595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,8,128,1,float16,fp8,0,0.04737280011177063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,8,128,1,fp8,fp8,0,0.04759680032730103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,1,128,1,float16,fp8,0,0.04742079973220825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,1,128,1,fp8,fp8,0,0.04738560020923614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,2,128,1,float16,float16,0,0.04740799963474274
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,2,128,1,float16,fp8,0,0.04754079878330231
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,2,128,1,fp8,fp8,0,0.047654399275779726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,4,128,1,float16,float16,0,0.04825440049171448
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,4,128,1,float16,fp8,0,0.047419199347496034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,4,128,1,fp8,fp8,0,0.0474480003118515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,1,128,1,fp8,fp8,0,0.06376799941062927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,1,128,1,float16,float16,0,0.5610047817230225
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,1,128,1,float16,fp8,0,0.6081888198852539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,8,128,1,fp8,fp8,0,0.09919360280036926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,1,128,1,fp8,fp8,0,0.6086783885955811
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,2,128,1,float16,float16,0,0.5459743976593018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,2,128,1,float16,fp8,0,0.6075200080871582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,2,128,1,fp8,fp8,0,0.5996528148651123
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,4,128,1,float16,float16,0,0.5911568164825439
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,4,128,1,float16,fp8,0,0.599123191833496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,8,128,1,float16,float16,0,0.3466880083084106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,4,128,1,fp8,fp8,0,0.6061535835266113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,8,128,1,float16,fp8,0,0.31525919437408445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,8,128,1,fp8,fp8,0,0.31943199634552
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,1,128,1,float16,float16,0,0.29266560077667236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,1,128,1,float16,float16,0,0.04730879962444305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,1,128,1,float16,fp8,0,0.32194879055023196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,1,128,1,fp8,fp8,0,0.31958398818969724
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,2,128,1,float16,float16,0,0.2900223970413208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,2,128,1,float16,fp8,0,0.32094879150390626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,2,128,1,fp8,fp8,0,0.31912639141082766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,4,128,1,float16,float16,0,0.3096400022506714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,4,128,1,float16,fp8,0,0.31994879245758057
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,4,128,1,fp8,fp8,0,0.3175584077835083
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,8,128,1,float16,float16,0,0.18945280313491822
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,8,128,1,float16,fp8,0,0.17393440008163452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,8,128,1,fp8,fp8,0,0.17229119539260865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,1,128,1,float16,float16,0,0.16332000494003296
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,1,128,1,float16,fp8,0,0.17229759693145752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,1,128,1,fp8,fp8,0,0.17339999675750734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,2,128,1,float16,float16,0,0.15964640378952027
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,2,128,1,float16,fp8,0,0.1743280053138733
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,2,128,1,fp8,fp8,0,0.1736207962036133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,4,128,1,float16,float16,0,0.172816002368927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,8,128,1,float16,fp8,0,0.09853280186653138
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,8,128,1,fp8,fp8,0,0.09704480171203614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,1,128,1,float16,float16,0,0.0913968026638031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,1,128,1,float16,fp8,0,0.09648640155792236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,1,128,1,fp8,fp8,0,0.09661120176315308
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,2,128,1,float16,float16,0,0.09246559739112854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,2,128,1,float16,fp8,0,0.09628639817237854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,2,128,1,fp8,fp8,0,0.09659519791603088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,4,128,1,float16,float16,0,0.09824320077896118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,4,128,1,float16,fp8,0,0.09655359983444214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,4,128,1,fp8,fp8,0,0.09655519723892211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,8,128,1,float16,float16,0,0.06592320203781128
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,8,128,1,float16,fp8,0,0.05980960130691528
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,8,128,1,fp8,fp8,0,0.05966079831123352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,1,128,1,float16,float16,0,0.05767040252685547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,4,128,1,float16,fp8,0,0.17058559656143188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,1,128,1,float16,fp8,0,0.059875202178955075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,1,128,1,fp8,fp8,0,0.059975999593734744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,8,128,1,float16,float16,0,0.10848799943923951
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,2,128,1,float16,float16,0,0.057903999090194704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,2,128,1,float16,fp8,0,0.059648001194000246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,2,128,1,fp8,fp8,0,0.05957919955253601
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,4,128,1,float16,float16,0,0.06024479866027832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,8,128,1,float16,float16,0,0.04079360067844391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,4,128,1,fp8,fp8,0,0.05971999764442444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,8,128,1,float16,fp8,0,0.03913280069828033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,1,128,1,float16,float16,0,0.03706560134887695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,1,128,1,float16,fp8,0,0.03869599997997284
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,1,128,1,fp8,fp8,0,0.03914879858493805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,2,128,1,float16,float16,0,0.03711200058460236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,2,128,1,float16,fp8,0,0.037462401390075686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,2,128,1,fp8,fp8,0,0.03716639876365661
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,4,128,1,float16,float16,0,0.037268799543380735
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,4,128,1,float16,fp8,0,0.03758560121059418
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,4,128,1,fp8,fp8,0,0.037678399682044984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,8,128,1,float16,float16,0,0.037092798948287965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,8,128,1,float16,fp8,0,0.03518880009651184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,8,128,1,fp8,fp8,0,0.035174399614334106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,1,128,1,float16,float16,0,0.03518719971179962
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,1,128,1,float16,fp8,0,0.0351936012506485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,1,128,1,fp8,fp8,0,0.03519999980926514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,4,128,1,fp8,fp8,0,0.17438080310821533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,2,128,1,float16,float16,0,0.03504000008106232
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,2,128,1,float16,fp8,0,0.035006400942802426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,4,128,1,float16,fp8,0,0.05963839888572693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,4,128,1,float16,float16,0,0.03515200018882751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,4,128,1,float16,fp8,0,0.035020801424980166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,4,128,1,fp8,fp8,0,0.034964799880981445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,8,128,1,fp8,fp8,0,0.03716480135917664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,1,128,1,float16,float16,0,0.35588960647583007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,1,128,1,float16,fp8,0,0.4061823844909668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,1,128,1,fp8,fp8,0,0.40007362365722654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,2,128,1,float16,float16,0,0.350379204750061
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,2,128,1,float16,fp8,0,0.40405120849609377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,2,128,1,fp8,fp8,0,0.4001039981842041
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,4,128,1,float16,float16,0,0.3779295921325684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,4,128,1,float16,fp8,0,0.40302238464355467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,4,128,1,fp8,fp8,0,0.3990959882736206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,8,128,1,float16,fp8,0,0.213755202293396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,8,128,1,fp8,fp8,0,0.21151359081268312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,1,128,1,float16,float16,0,0.1923792004585266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,1,128,1,float16,fp8,0,0.21224160194396974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,1,128,1,fp8,fp8,0,0.2154560089111328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,2,128,1,float16,float16,0,0.18838560581207275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,2,128,1,float16,fp8,0,0.2141711950302124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,2,128,1,fp8,fp8,0,0.2115407943725586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,2,128,1,fp8,fp8,0,0.035155200958251955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,4,128,1,float16,float16,0,0.20676159858703613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,4,128,1,float16,fp8,0,0.2116015911102295
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,8,128,1,float16,float16,0,0.12804640531539918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,4,128,1,fp8,fp8,0,0.21409120559692382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,8,128,1,float16,fp8,0,0.11656639575958253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,8,128,1,fp8,fp8,0,0.11715840101242066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,1,128,1,float16,float16,0,0.10332319736480713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,1,128,1,float16,fp8,0,0.11538399457931518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,1,128,1,fp8,fp8,0,0.11388959884643554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,2,128,1,float16,float16,0,0.10748640298843384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,2,128,1,float16,fp8,0,0.11499680280685425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,2,128,1,fp8,fp8,0,0.11669759750366211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,8,128,1,float16,float16,0,0.23206400871276855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,4,128,1,float16,float16,0,0.11300640106201172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,4,128,1,float16,fp8,0,0.11688159704208374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,4,128,1,fp8,fp8,0,0.11555999517440796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,8,128,1,float16,float16,0,0.07533280253410339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,8,128,1,fp8,fp8,0,0.06774399876594543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,8,128,1,float16,fp8,0,0.06822720170021057
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,1,128,1,float16,float16,0,0.0619983971118927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,1,128,1,float16,fp8,0,0.06661760210990905
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,1,128,1,fp8,fp8,0,0.06817280054092408
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,2,128,1,float16,float16,0,0.062352001667022705
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,2,128,1,float16,fp8,0,0.06693120002746582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,2,128,1,fp8,fp8,0,0.06813600063323974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,4,128,1,float16,fp8,0,0.06675040125846862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,8,128,1,float16,float16,0,0.04557119905948639
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,4,128,1,fp8,fp8,0,0.06805599927902221
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,8,128,1,float16,fp8,0,0.04323840141296387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,8,128,1,fp8,fp8,0,0.04359839856624603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,1,128,1,float16,float16,0,0.041259199380874634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,1,128,1,float16,fp8,0,0.04338400065898895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,2,128,1,float16,float16,0,0.0414463996887207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,2,128,1,float16,fp8,0,0.10625439882278442
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,2,128,1,fp8,fp8,0,0.04355359971523285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,4,128,1,float16,float16,0,0.043638399243354796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,4,128,1,float16,fp8,0,0.04355520009994507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,4,128,1,fp8,fp8,0,0.043505600094795226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,8,128,1,float16,float16,0,0.033185601234436035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,8,128,1,float16,fp8,0,0.03116160035133362
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,8,128,1,fp8,fp8,0,0.03115839958190918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,1,128,1,float16,float16,0,0.030379199981689455
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,1,128,1,fp8,fp8,0,0.03105120062828064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,2,128,1,float16,float16,0,0.029947200417518617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,2,128,1,float16,fp8,0,0.0312608003616333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,2,128,1,fp8,fp8,0,0.031033599376678468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,4,128,1,float16,float16,0,0.030902400612831116
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,4,128,1,float16,fp8,0,0.03128640055656433
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,4,128,1,fp8,fp8,0,0.03095200061798096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,8,128,1,float16,float16,0,0.030910399556159974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,8,128,1,float16,fp8,0,0.029017600417137145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,8,128,1,fp8,fp8,0,0.02908160090446472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,1,128,1,float16,float16,0,0.029291200637817382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,1,128,1,float16,fp8,0,0.029123198986053467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,1,128,1,fp8,fp8,0,0.029039999842643736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,2,128,1,float16,float16,0,0.02900159955024719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,2,128,1,float16,fp8,0,0.029300799965858458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,2,128,1,fp8,fp8,0,0.029211199283599852
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,4,128,1,float16,float16,0,0.06678559780120849
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,4,128,1,float16,float16,0,0.029023998975753786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,1,128,1,float16,fp8,0,0.031249600648880004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,4,128,1,fp8,fp8,0,0.02889760136604309
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,1,128,1,float16,float16,0,0.37642080783843995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,1,128,1,fp8,fp8,0,0.04300639927387238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,1,128,1,float16,fp8,0,0.4519983768463135
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,2,128,1,float16,float16,0,0.3649840116500854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,1,128,1,fp8,fp8,0,0.44881281852722166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,2,128,1,float16,fp8,0,0.4476111888885498
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,2,128,1,fp8,fp8,0,0.44210081100463866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,4,128,1,float16,fp8,0,0.44739680290222167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,8,128,1,float16,float16,0,0.25420958995819093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,8,128,1,float16,fp8,0,0.23373920917510987
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,8,128,1,fp8,fp8,0,0.23379039764404297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,4,128,1,float16,fp8,0,0.028891199827194215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,1,128,1,float16,float16,0,0.2000351905822754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,1,128,1,float16,fp8,0,0.23321280479431153
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,1,128,1,fp8,fp8,0,0.23572320938110353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,2,128,1,float16,float16,0,0.19349119663238526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,2,128,1,float16,fp8,0,0.23492000102996827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,2,128,1,fp8,fp8,0,0.2322848081588745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,4,128,1,float16,float16,0,0.21644320487976074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,4,128,1,float16,fp8,0,0.23201920986175537
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,4,128,1,fp8,fp8,0,0.23405759334564208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,4,128,1,float16,float16,0,0.4109839916229248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,8,128,1,float16,float16,0,0.13663519620895387
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,8,128,1,float16,fp8,0,0.12747520208358765
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,1,128,1,float16,float16,0,0.10551680326461792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,1,128,1,float16,fp8,0,0.12549439668655396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,4,128,1,fp8,fp8,0,0.44130239486694334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,2,128,1,float16,float16,0,0.10873600244522094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,2,128,1,float16,fp8,0,0.12375520467758179
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,2,128,1,fp8,fp8,0,0.12577120065689087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,4,128,1,float16,fp8,0,0.12598559856414795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,4,128,1,fp8,fp8,0,0.12645119428634644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,8,128,1,float16,float16,0,0.08073440194129944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,8,128,1,float16,fp8,0,0.07193599939346314
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,8,128,1,fp8,fp8,0,0.07192000150680541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,1,128,1,float16,float16,0,0.06120960116386413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,1,128,1,float16,fp8,0,0.06994879841804505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,1,128,1,fp8,fp8,0,0.06985120177268982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,8,128,1,fp8,fp8,0,0.12724159955978392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,2,128,1,float16,float16,0,0.06133440136909485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,2,128,1,float16,fp8,0,0.07026079893112183
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,4,128,1,float16,float16,0,0.1173408031463623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,2,128,1,fp8,fp8,0,0.07015839815139771
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,4,128,1,float16,float16,0,0.06739839911460876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,4,128,1,float16,fp8,0,0.07024480104446411
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,4,128,1,fp8,fp8,0,0.06949599981307983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,8,128,1,float16,fp8,0,0.04318560063838959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,8,128,1,fp8,fp8,0,0.043166399002075195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,1,128,1,float16,float16,0,0.03903039991855621
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,1,128,1,float16,fp8,0,0.04296320080757141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,1,128,1,fp8,fp8,0,0.043100801110267636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,2,128,1,float16,float16,0,0.03893919885158539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,2,128,1,float16,fp8,0,0.042556801438331605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,2,128,1,fp8,fp8,0,0.04254559874534607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,4,128,1,float16,float16,0,0.04103040099143982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,4,128,1,float16,fp8,0,0.042115199565887454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,4,128,1,fp8,fp8,0,0.04249120056629181
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,8,128,1,float16,float16,0,0.02690559923648834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,8,128,1,float16,fp8,0,0.026841598749160766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,8,128,1,fp8,fp8,0,0.026840001344680786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,1,128,1,float16,float16,0,0.02480800002813339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,1,128,1,fp8,fp8,0,0.1244271993637085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,1,128,1,float16,fp8,0,0.02683840095996857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,1,128,1,fp8,fp8,0,0.02690559923648834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,2,128,1,float16,float16,0,0.02475679963827133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,2,128,1,float16,fp8,0,0.02688319981098175
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,4,128,1,float16,float16,0,0.024879999458789825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,4,128,1,float16,fp8,0,0.026849600672721862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,4,128,1,fp8,fp8,0,0.026824000477790832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,8,128,1,float16,float16,0,0.047646400332450864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,8,128,1,float16,float16,0,0.024803200364112855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,8,128,1,float16,fp8,0,0.02475679963827133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,8,128,1,fp8,fp8,0,0.02476000040769577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,1,128,1,float16,float16,0,0.022814400494098663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,1,128,1,fp8,fp8,0,0.024715200066566467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,2,128,1,float16,float16,0,0.022832000255584718
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,2,128,1,float16,fp8,0,0.02471359968185425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,2,128,1,fp8,fp8,0,0.024707199633121492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,4,128,1,float16,float16,0,0.023024000227451324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,4,128,1,fp8,fp8,0,0.024784000217914583
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,4,128,1,float16,fp8,0,0.025496000051498414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,8,128,1,float16,float16,0,0.022951999306678773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,8,128,1,fp8,fp8,0,0.02290239930152893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,8,128,1,float16,fp8,0,0.022787199914455415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,1,128,1,float16,float16,0,0.02279199957847595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,1,128,1,fp8,fp8,0,0.022838400304317476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,1,128,1,float16,fp8,0,0.023134399950504304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,2,128,1,float16,float16,0,0.02292640060186386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,2,128,1,float16,fp8,0,0.023158399760723113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,4,128,1,float16,float16,0,0.02328319996595383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,4,128,1,float16,fp8,0,0.02311519980430603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,2,128,1,fp8,fp8,0,0.026822400093078614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,4,128,1,fp8,fp8,0,0.022843199968338012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,1,128,1,float16,float16,0,0.27708001136779786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,1,128,1,float16,fp8,0,0.3657680034637451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,1,128,1,fp8,fp8,0,0.36260480880737306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,1,128,1,float16,fp8,0,0.024721600115299225
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,2,128,1,float16,float16,0,0.27854559421539304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,2,128,1,float16,fp8,0,0.36349918842315676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,2,128,1,fp8,fp8,0,0.3609391927719116
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,4,128,1,float16,float16,0,0.3165983915328979
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,4,128,1,float16,fp8,0,0.36094720363616944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,4,128,1,fp8,fp8,0,0.3588495969772339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,8,128,1,float16,float16,0,0.20562241077423096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,8,128,1,float16,fp8,0,0.18813600540161132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,8,128,1,fp8,fp8,0,0.18843040466308594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,1,128,1,float16,float16,0,0.1480080008506775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,1,128,1,float16,fp8,0,0.1886623978614807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,1,128,1,fp8,fp8,0,0.18865439891815186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,2,128,1,float16,fp8,0,0.18984639644622803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,2,128,1,float16,float16,0,0.147599995136261
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,2,128,1,fp8,fp8,0,0.18759039640426636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,4,128,1,float16,float16,0,0.16560319662094117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,4,128,1,fp8,fp8,0,0.18808319568634033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,4,128,1,float16,fp8,0,0.19001439809799195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,8,128,1,float16,float16,0,0.11029119491577148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,8,128,1,float16,fp8,0,0.10074880123138427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,8,128,1,fp8,fp8,0,0.1030351996421814
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,1,128,1,float16,fp8,0,0.09947519898414611
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,1,128,1,fp8,fp8,0,0.10078239440917969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,2,128,1,float16,float16,0,0.08050240278244018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,2,128,1,fp8,fp8,0,0.10024640560150147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,4,128,1,float16,float16,0,0.09055200219154358
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,4,128,1,float16,fp8,0,0.10079200267791748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,4,128,1,fp8,fp8,0,0.10068960189819336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,8,128,1,float16,float16,0,0.06217920184135437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,8,128,1,float16,fp8,0,0.056443202495574954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,8,128,1,fp8,fp8,0,0.05586400032043457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,1,128,1,float16,float16,0,0.04527199864387512
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,1,128,1,float16,fp8,0,0.055524802207946776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,1,128,1,fp8,fp8,0,0.05561760067939758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,2,128,1,float16,float16,0,0.045531201362609866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,2,128,1,fp8,fp8,0,0.022944000363349915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,2,128,1,float16,fp8,0,0.05547999739646912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,2,128,1,fp8,fp8,0,0.055553597211837766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,4,128,1,float16,float16,0,0.05141760110855102
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,4,128,1,float16,fp8,0,0.055553597211837766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,4,128,1,fp8,fp8,0,0.0555728018283844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,8,128,1,float16,float16,0,0.03707680106163025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,1,128,1,float16,float16,0,0.08046720027923585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,8,128,1,fp8,fp8,0,0.03319199979305267
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,8,128,1,float16,fp8,0,0.03319680094718933
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,1,128,1,float16,float16,0,0.028923198580741882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,1,128,1,float16,fp8,0,0.03319680094718933
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,1,128,1,fp8,fp8,0,0.033032000064849854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,2,128,1,float16,float16,0,0.02909280061721802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,2,128,1,float16,fp8,0,0.033025598526000975
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,2,128,1,float16,fp8,0,0.1008255958557129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,4,128,1,float16,float16,0,0.030976000428199767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,4,128,1,float16,fp8,0,0.03499360084533691
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,4,128,1,fp8,fp8,0,0.033073601126670835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,8,128,1,float16,float16,0,0.02308479994535446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,8,128,1,fp8,fp8,0,0.021347199380397797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,1,128,1,float16,float16,0,0.019006399810314177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,1,128,1,float16,fp8,0,0.021089600026607515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,1,128,1,fp8,fp8,0,0.020947200059890748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,2,128,1,float16,float16,0,0.019012799859046935
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,2,128,1,float16,fp8,0,0.021049599349498748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,2,128,1,fp8,fp8,0,0.021147200465202333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,4,128,1,float16,float16,0,0.02065120041370392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,4,128,1,float16,fp8,0,0.021294400095939636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,4,128,1,fp8,fp8,0,0.02110559940338135
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,8,128,1,float16,float16,0,0.018878400325775146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,8,128,1,float16,fp8,0,0.018900799751281738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,8,128,1,fp8,fp8,0,0.018904000520706177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,1,128,1,float16,float16,0,0.017254400253295898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,1,128,1,float16,fp8,0,0.01881760060787201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,2,128,1,fp8,fp8,0,0.03497599959373474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,1,128,1,fp8,fp8,0,0.018892799317836762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,2,128,1,float16,float16,0,0.016908800601959227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,2,128,1,float16,fp8,0,0.018772800266742707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,8,128,1,float16,fp8,0,0.020817600190639496
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,4,128,1,float16,float16,0,0.01863040030002594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,2,128,1,fp8,fp8,0,0.018859200179576874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,4,128,1,float16,fp8,0,0.018705600500106813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,4,128,1,fp8,fp8,0,0.01889120042324066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,8,128,1,float16,fp8,0,0.016673600673675536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,1,128,1,float16,float16,0,0.016603200137615202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,8,128,1,fp8,fp8,0,0.016832000017166136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,1,128,1,float16,fp8,0,0.01664479970932007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,2,128,1,float16,float16,0,0.01666080057621002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,1,128,1,fp8,fp8,0,0.0169855996966362
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,2,128,1,float16,fp8,0,0.016633599996566772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,4,128,1,float16,float16,0,0.016707199811935424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,2,128,1,fp8,fp8,0,0.016785599291324615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,4,128,1,float16,fp8,0,0.01666879951953888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,4,128,1,fp8,fp8,0,0.017107200622558594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,8,128,1,float16,float16,0,0.01669919937849045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,8,128,1,float16,fp8,0,0.016892799735069276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,1,128,1,float16,fp8,0,0.016734400391578676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,1,128,1,float16,float16,0,0.016859200596809388
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,1,128,1,fp8,fp8,0,0.016761599481105803
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,2,128,1,float16,float16,0,0.01684959977865219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,2,128,1,float16,fp8,0,0.016809600591659545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,2,128,1,fp8,fp8,0,0.016847999393939973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,4,128,1,float16,float16,0,0.01669600009918213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,4,128,1,float16,fp8,0,0.01671680063009262
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,4,128,1,fp8,fp8,0,0.016758400201797485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,1,128,1,float16,float16,0,0.11945760250091553
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,1,128,1,float16,fp8,0,0.16643040180206298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,1,128,1,fp8,fp8,0,0.16471680402755737
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,2,128,1,float16,float16,0,0.1213263988494873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,2,128,1,float16,fp8,0,0.16498719453811644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,8,128,1,float16,float16,0,0.018680000305175783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,4,128,1,float16,float16,0,0.13995360136032103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,4,128,1,fp8,fp8,0,0.16458719968795776
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,8,128,1,float16,float16,0,0.09610720276832581
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,8,128,1,float16,fp8,0,0.08644480109214783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,8,128,1,fp8,fp8,0,0.08640159964561463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,1,128,1,float16,float16,0,0.06481760144233703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,8,128,1,fp8,fp8,0,0.016631999611854555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,1,128,1,float16,fp8,0,0.08509600162506104
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,1,128,1,fp8,fp8,0,0.08608480095863343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,2,128,1,float16,fp8,0,0.08617920279502869
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,2,128,1,fp8,fp8,0,0.08626719713211059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,4,128,1,float16,float16,0,0.07623040080070495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,4,128,1,float16,fp8,0,0.08624160289764404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,4,128,1,fp8,fp8,0,0.08632640242576599
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,8,128,1,float16,float16,0,0.05558239817619324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,8,128,1,float16,fp8,0,0.04954879879951477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,8,128,1,fp8,fp8,0,0.05044639706611633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,2,128,1,fp8,fp8,0,0.16629279851913453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,1,128,1,float16,float16,0,0.03729279935359955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,1,128,1,fp8,fp8,0,0.04939039945602417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,4,128,1,float16,fp8,0,0.16611520051956177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,2,128,1,float16,float16,0,0.039273598790168764
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,2,128,1,float16,fp8,0,0.049369600415229795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,4,128,1,float16,float16,0,0.04537599980831146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,4,128,1,float16,fp8,0,0.04941920042037964
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,4,128,1,fp8,fp8,0,0.04996159970760346
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,8,128,1,float16,float16,0,0.031052801012992858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,8,128,1,float16,fp8,0,0.028942400217056276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,2,128,1,float16,float16,0,0.06561920046806335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,8,128,1,fp8,fp8,0,0.02895039916038513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,1,128,1,float16,float16,0,0.022763200104236603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,1,128,1,float16,fp8,0,0.02900800108909607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,1,128,1,fp8,fp8,0,0.02887679934501648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,2,128,1,float16,float16,0,0.022913600504398345
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,2,128,1,float16,fp8,0,0.02887679934501648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,2,128,1,fp8,fp8,0,0.028951999545097352
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,4,128,1,float16,float16,0,0.02478879988193512
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,4,128,1,float16,fp8,0,0.028935998678207397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,4,128,1,fp8,fp8,0,0.02890399992465973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,8,128,1,float16,float16,0,0.01889120042324066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,8,128,1,float16,fp8,0,0.018694399297237395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,8,128,1,fp8,fp8,0,0.01873600035905838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,1,128,1,float16,fp8,0,0.049537599086761475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,1,128,1,float16,float16,0,0.014689600467681885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,1,128,1,float16,fp8,0,0.018848000466823576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,2,128,1,float16,float16,0,0.01664000004529953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,1,128,1,fp8,fp8,0,0.018611200153827667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,2,128,1,float16,fp8,0,0.0188511997461319
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,4,128,1,float16,float16,0,0.016809600591659545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,2,128,1,fp8,fp8,0,0.018743999302387238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,4,128,1,float16,fp8,0,0.01886080056428909
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,4,128,1,fp8,fp8,0,0.0186831995844841
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,8,128,1,float16,float16,0,0.01653439998626709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,8,128,1,fp8,fp8,0,0.016599999368190767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,8,128,1,float16,fp8,0,0.015969599783420562
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,1,128,1,float16,float16,0,0.01459999978542328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,1,128,1,float16,fp8,0,0.014720000326633453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,1,128,1,fp8,fp8,0,0.016473600268363954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,2,128,1,float16,fp8,0,0.01664319932460785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,2,128,1,float16,float16,0,0.014644800126552582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,2,128,1,fp8,fp8,0,0.016542400419712066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,4,128,1,float16,float16,0,0.014681600034236908
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,4,128,1,float16,fp8,0,0.016638399660587312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,8,128,1,float16,float16,0,0.014659200608730317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,4,128,1,fp8,fp8,0,0.016625599563121797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,8,128,1,float16,fp8,0,0.014713600277900696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,8,128,1,fp8,fp8,0,0.01472959965467453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,1,128,1,float16,float16,0,0.01451359987258911
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,1,128,1,float16,fp8,0,0.01467680037021637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,2,128,1,float16,float16,0,0.014745600521564484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,2,128,1,float16,fp8,0,0.014686399698257446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,2,128,1,fp8,fp8,0,0.0146479994058609
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,4,128,1,float16,float16,0,0.014694400131702423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,2,128,1,fp8,fp8,0,0.049435201287269595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,4,128,1,float16,fp8,0,0.014716799557209014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,4,128,1,fp8,fp8,0,0.014643199741840363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,8,128,1,float16,float16,0,0.01467680037021637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,8,128,1,float16,fp8,0,0.014638400077819825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,8,128,1,fp8,fp8,0,0.014737600088119506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,1,128,1,float16,float16,0,0.014584000408649444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,1,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,1,128,1,fp8,fp8,0,0.014212800562381745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,2,128,1,float16,fp8,0,0.014640000462532044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,2,128,1,fp8,fp8,0,0.014528000354766845
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,4,128,1,float16,float16,0,0.013356800377368926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,4,128,1,float16,fp8,0,0.014416000247001648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,4,128,1,fp8,fp8,0,0.014577600359916686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,8,128,1,float16,float16,0,0.01449120044708252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,8,128,1,float16,fp8,0,0.012579199671745301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,8,128,1,fp8,fp8,0,0.015033599734306336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,1,128,1,float16,fp8,0,0.013665600121021271
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,1,128,1,fp8,fp8,0,0.013359999656677246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,2,128,1,float16,float16,0,0.014574399590492249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,2,128,1,float16,fp8,0,0.01451680064201355
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,2,128,1,fp8,fp8,0,0.01449279934167862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,4,128,1,float16,float16,0,0.014480000734329224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,4,128,1,float16,fp8,0,0.014455999433994293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,4,128,1,fp8,fp8,0,0.014548799395561219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,1,128,1,float16,float16,0,0.07518399953842163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,1,128,1,float16,fp8,0,0.09754400253295899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,1,128,1,fp8,fp8,0,0.09734879732131958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,2,128,1,float16,float16,0,0.07549120187759399
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,2,128,1,float16,fp8,0,0.09705280065536499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,2,128,1,float16,float16,0,0.014590400457382201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,4,128,1,float16,float16,0,0.08446400165557862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,4,128,1,float16,fp8,0,0.09660639762878417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,1,128,1,float16,float16,0,0.012668800354003907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,4,128,1,fp8,fp8,0,0.09695519804954529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,8,128,1,float16,float16,0,0.05576159954071045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,8,128,1,fp8,fp8,0,0.053339201211929324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,1,128,1,float16,float16,0,0.040777599811553954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,1,128,1,float16,fp8,0,0.05140320062637329
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,1,128,1,fp8,fp8,0,0.014659200608730317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,1,128,1,fp8,fp8,0,0.05149120092391968
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,2,128,1,float16,float16,0,0.041345599293708804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,2,128,1,float16,fp8,0,0.05139039754867554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,2,128,1,fp8,fp8,0,0.05151360034942627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,4,128,1,float16,float16,0,0.04729920029640198
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,4,128,1,float16,fp8,0,0.05144960284233093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,4,128,1,fp8,fp8,0,0.05144960284233093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,8,128,1,float16,float16,0,0.033011201024055484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,8,128,1,float16,fp8,0,0.03097119927406311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,8,128,1,fp8,fp8,0,0.0310479998588562
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,1,128,1,float16,float16,0,0.024899199604988098
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,1,128,1,float16,fp8,0,0.030910399556159974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,1,128,1,fp8,fp8,0,0.030907198786735535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,2,128,1,float16,float16,0,0.025033599138259886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,2,128,1,float16,fp8,0,0.03097119927406311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,2,128,1,fp8,fp8,0,0.031044799089431762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,4,128,1,float16,float16,0,0.026867198944091796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,4,128,1,float16,fp8,0,0.030985599756240843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,4,128,1,fp8,fp8,0,0.030947199463844298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,8,128,1,float16,float16,0,0.020712000131607056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,8,128,1,float16,fp8,0,0.01881760060787201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,2,128,1,fp8,fp8,0,0.0968559980392456
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,1,128,1,float16,float16,0,0.016764800250530242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,1,128,1,float16,fp8,0,0.020580799877643587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,1,128,1,fp8,fp8,0,0.020641599595546723
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,2,128,1,float16,float16,0,0.016755199432373045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,2,128,1,float16,fp8,0,0.01884160041809082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,8,128,1,float16,fp8,0,0.05353599786758423
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,2,128,1,fp8,fp8,0,0.018743999302387238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,4,128,1,float16,float16,0,0.016832000017166136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,4,128,1,float16,fp8,0,0.018878400325775146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,4,128,1,fp8,fp8,0,0.01879040002822876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,8,128,1,float16,float16,0,0.014561599493026734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,8,128,1,float16,fp8,0,0.013342399895191193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,8,128,1,fp8,fp8,0,0.014558400213718414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,1,128,1,float16,float16,0,0.012561599910259246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,1,128,1,float16,fp8,0,0.014601600170135499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,1,128,1,fp8,fp8,0,0.012630400061607362
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,2,128,1,float16,float16,0,0.012574400007724761
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,2,128,1,float16,fp8,0,0.012894399464130402
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,2,128,1,fp8,fp8,0,0.014560000598430633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,4,128,1,float16,float16,0,0.01266240030527115
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,4,128,1,float16,fp8,0,0.014692799746990204
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,8,128,1,float16,float16,0,0.012654399871826172
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,8,128,1,float16,fp8,0,0.012705600261688233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,8,128,1,fp8,fp8,0,0.01268479973077774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,1,128,1,float16,float16,0,0.01263359934091568
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,8,128,1,fp8,fp8,0,0.020656000077724456
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,1,128,1,float16,fp8,0,0.01268640011548996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,1,128,1,fp8,fp8,0,0.012572799623012543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,2,128,1,float16,float16,0,0.012531200051307678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,2,128,1,float16,fp8,0,0.012715199589729309
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,2,128,1,fp8,fp8,0,0.012646399438381195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,4,128,1,float16,float16,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,4,128,1,float16,fp8,0,0.012656000256538392
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,8,128,1,float16,float16,0,0.010945600271224976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,8,128,1,float16,fp8,0,0.011564800143241882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,8,128,1,fp8,fp8,0,0.011507199704647064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,1,128,1,float16,float16,0,0.010644800215959548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,1,128,1,float16,fp8,0,0.010740800201892853
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,1,128,1,fp8,fp8,0,0.010667199641466141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,2,128,1,float16,float16,0,0.010657600313425063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,2,128,1,float16,fp8,0,0.012590399384498597
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,2,128,1,fp8,fp8,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,4,128,1,fp8,fp8,0,0.012671999633312225
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,4,128,1,float16,float16,0,0.01072319969534874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,4,128,1,float16,fp8,0,0.01252799928188324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,8,128,1,float16,float16,0,0.010732799768447876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,8,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,8,128,1,fp8,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,1,128,1,float16,float16,0,0.010542400181293488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,1,128,1,float16,fp8,0,0.010608000308275222
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,1,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,2,128,1,float16,float16,0,0.010622400045394897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,2,128,1,float16,fp8,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,2,128,1,fp8,fp8,0,0.010596799850463866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,4,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,4,128,1,fp8,fp8,0,0.010824000090360641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,8,128,1,float16,float16,0,0.010644800215959548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,4,128,1,fp8,fp8,0,0.012614400684833526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,8,128,1,float16,fp8,0,0.010601600259542465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,8,128,1,fp8,fp8,0,0.011048000305891037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,1,128,1,float16,float16,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,1,128,1,float16,fp8,0,0.010760000348091126
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,2,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,2,128,1,float16,fp8,0,0.01066880002617836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,2,128,1,fp8,fp8,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,4,128,1,float16,float16,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,4,128,1,float16,fp8,0,0.011083199828863143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,4,128,1,fp8,fp8,0,0.010763200372457505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,1,128,1,float16,float16,0,0.06000319719314575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,1,128,1,float16,fp8,0,0.06989279985427857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,1,128,1,fp8,fp8,0,0.06996480226516724
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,2,128,1,float16,float16,0,0.061222398281097413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,2,128,1,float16,fp8,0,0.06986879706382751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,2,128,1,fp8,fp8,0,0.06991360187530518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,4,128,1,float16,float16,0,0.06607999801635742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,4,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,4,128,1,float16,fp8,0,0.07001919746398926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,4,128,1,fp8,fp8,0,0.06974719762802124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,8,128,1,float16,float16,0,0.04145439863204956
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,8,128,1,float16,fp8,0,0.03893760144710541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,8,128,1,fp8,fp8,0,0.039078399538993835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,1,128,1,float16,float16,0,0.033232000470161435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,1,128,1,float16,fp8,0,0.0391072005033493
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,1,128,1,fp8,fp8,0,0.03916000127792359
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,2,128,1,float16,float16,0,0.034934398531913755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,2,128,1,float16,fp8,0,0.03830080032348633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,4,128,1,float16,float16,0,0.035211199522018434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,4,128,1,float16,fp8,0,0.03903520107269287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,4,128,1,fp8,fp8,0,0.03918400108814239
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,8,128,1,float16,float16,0,0.024868799746036528
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,8,128,1,float16,fp8,0,0.025227200984954835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,8,128,1,fp8,fp8,0,0.024792000651359558
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,1,128,1,float16,float16,0,0.022651199996471406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,1,128,1,float16,fp8,0,0.02481440007686615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,1,128,1,fp8,fp8,0,0.02475679963827133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,4,128,1,fp8,fp8,0,0.011107199639081956
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,2,128,1,float16,float16,0,0.021087999641895293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,2,128,1,fp8,fp8,0,0.024792000651359558
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,4,128,1,float16,float16,0,0.02274080067873001
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,1,128,1,fp8,fp8,0,0.010630399733781815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,4,128,1,float16,fp8,0,0.024846400320529937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,4,128,1,fp8,fp8,0,0.024903999269008638
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,8,128,1,float16,float16,0,0.016752000153064727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,8,128,1,float16,fp8,0,0.016752000153064727
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,8,128,1,fp8,fp8,0,0.016624000668525696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,1,128,1,float16,float16,0,0.014793600142002105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,1,128,1,float16,fp8,0,0.01661120057106018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,1,128,1,fp8,fp8,0,0.01659359931945801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,2,128,1,float16,fp8,0,0.024820800125598907
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,2,128,1,float16,float16,0,0.014689600467681885
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,2,128,1,float16,fp8,0,0.016715200245380403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,4,128,1,float16,float16,0,0.014764800667762756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,4,128,1,float16,fp8,0,0.016620799899101257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,4,128,1,fp8,fp8,0,0.016710400581359863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,8,128,1,float16,float16,0,0.012665599584579468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,8,128,1,float16,fp8,0,0.012580800056457519
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,8,128,1,fp8,fp8,0,0.012625600397586822
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,2,128,1,fp8,fp8,0,0.0392879992723465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,1,128,1,float16,fp8,0,0.012598399817943574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,1,128,1,fp8,fp8,0,0.012479999661445617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,2,128,1,float16,float16,0,0.012385600060224534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,2,128,1,float16,fp8,0,0.012574400007724761
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,4,128,1,float16,float16,0,0.012561599910259246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,2,128,1,fp8,fp8,0,0.012831999361515046
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,4,128,1,float16,fp8,0,0.012511999905109405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,8,128,1,float16,float16,0,0.010835199803113937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,4,128,1,fp8,fp8,0,0.012796799838542938
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,8,128,1,fp8,fp8,0,0.011035200208425522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,1,128,1,float16,float16,0,0.01101119965314865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,2,128,1,fp8,fp8,0,0.016598400473594666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,1,128,1,float16,fp8,0,0.010972800105810166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,1,128,1,fp8,fp8,0,0.011187200248241425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,2,128,1,float16,float16,0,0.010814400017261505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,2,128,1,float16,fp8,0,0.011584000289440155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,2,128,1,fp8,fp8,0,0.010847999900579452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,4,128,1,float16,float16,0,0.010635200142860412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,4,128,1,fp8,fp8,0,0.011648000031709672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,8,128,1,float16,float16,0,0.010585600137710571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,8,128,1,float16,fp8,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,8,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,1,128,1,float16,float16,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,1,128,1,float16,float16,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,1,128,1,float16,fp8,0,0.010558400303125381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,1,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,2,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,2,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,4,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,4,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,4,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,8,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,8,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,8,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,1,128,1,float16,float16,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,1,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,1,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,2,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,2,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,4,128,1,float16,fp8,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,2,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,4,128,1,float16,float16,0,0.010628800094127654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,4,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,4,128,1,fp8,fp8,0,0.010627199709415436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,8,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,8,128,1,float16,fp8,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,8,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,1,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,1,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,1,128,1,fp8,fp8,0,0.010625600069761276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,2,128,1,float16,fp8,0,0.010676799714565277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,2,128,1,fp8,fp8,0,0.010598400235176086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,4,128,1,float16,float16,0,0.010547199845314026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,4,128,1,float16,fp8,0,0.010619200021028518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,4,128,1,fp8,fp8,0,0.01071999967098236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,8,128,1,float16,fp8,0,0.011105599999427795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,1,128,1,float16,float16,0,0.051926398277282716
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,1,128,1,float16,fp8,0,0.05571519732475281
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,1,128,1,fp8,fp8,0,0.05570240020751953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,2,128,1,float16,float16,0,0.05213760137557984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,2,128,1,float16,fp8,0,0.05564320087432861
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,2,128,1,fp8,fp8,0,0.055904000997543335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,4,128,1,float16,float16,0,0.05360640287399292
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,4,128,1,float16,fp8,0,0.05563520193099976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,4,128,1,fp8,fp8,0,0.055529600381851195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,8,128,1,float16,float16,0,0.03371520042419433
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,8,128,1,float16,fp8,0,0.031219199299812317
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,8,128,1,fp8,fp8,0,0.03291040062904358
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,1,128,1,float16,float16,0,0.03081600069999695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,1,128,1,float16,fp8,0,0.033108800649642944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,1,128,1,fp8,fp8,0,0.031758400797843936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,2,128,1,float16,float16,0,0.03110400140285492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,2,128,1,float16,fp8,0,0.031012800335884095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,2,128,1,fp8,fp8,0,0.03301439881324768
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,4,128,1,float16,float16,0,0.031040000915527343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,4,128,1,float16,fp8,0,0.032950401306152344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,4,128,1,fp8,fp8,0,0.03306080102920532
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,8,128,1,float16,float16,0,0.021793599426746368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,8,128,1,float16,fp8,0,0.02078240066766739
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,8,128,1,fp8,fp8,0,0.02091040015220642
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,1,128,1,float16,float16,0,0.020761600136756896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,1,128,1,float16,fp8,0,0.02074880003929138
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,1,128,1,fp8,fp8,0,0.020819200575351714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,2,128,1,float16,float16,0,0.02078399956226349
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,2,128,1,float16,fp8,0,0.020777599513530733
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,2,128,1,fp8,fp8,0,0.02082560062408447
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,4,128,1,float16,float16,0,0.020865599811077117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,4,128,1,float16,fp8,0,0.020732800662517547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,4,128,1,fp8,fp8,0,0.02083040028810501
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,8,128,1,float16,float16,0,0.014703999459743499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,8,128,1,float16,fp8,0,0.014480000734329224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,8,128,1,fp8,fp8,0,0.014657600224018097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,1,128,1,float16,float16,0,0.014454400539398194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,1,128,1,float16,fp8,0,0.014547200500965118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,1,128,1,fp8,fp8,0,0.01467359960079193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,2,128,1,float16,float16,0,0.014614400267601014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,2,128,1,float16,fp8,0,0.014824000000953675
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,2,128,1,fp8,fp8,0,0.01456640064716339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,4,128,1,float16,float16,0,0.014620800316333771
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,2,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,4,128,1,float16,fp8,0,0.014790399372577668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,4,128,1,fp8,fp8,0,0.014582400023937226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,8,128,1,float16,float16,0,0.011582399904727935
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,8,128,1,float16,fp8,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,8,128,1,fp8,fp8,0,0.01067200005054474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,1,128,1,float16,float16,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,1,128,1,float16,fp8,0,0.010675200074911118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,1,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,2,128,1,float16,float16,0,0.01061440035700798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,2,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,2,128,1,fp8,fp8,0,0.010691200196743012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,4,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,4,128,1,float16,fp8,0,0.010702399909496308
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,4,128,1,fp8,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,8,128,1,float16,float16,0,0.010708799958229065
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,8,128,1,float16,fp8,0,0.010552000254392624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,8,128,1,fp8,fp8,0,0.010636799782514573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,1,128,1,float16,float16,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,1,128,1,float16,fp8,0,0.010675200074911118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,2,128,1,float16,float16,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,2,128,1,float16,fp8,0,0.010729599744081497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,2,128,1,fp8,fp8,0,0.010694400221109391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,4,128,1,float16,float16,0,0.010860799998044967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,4,128,1,float16,fp8,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,4,128,1,fp8,fp8,0,0.010814400017261505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,8,128,1,float16,float16,0,0.010542400181293488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,8,128,1,float16,fp8,0,0.010838399827480315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,8,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,1,128,1,float16,float16,0,0.010753600299358368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,1,128,1,float16,fp8,0,0.010664000362157821
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,1,128,1,fp8,fp8,0,0.01072319969534874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,2,128,1,float16,float16,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,2,128,1,float16,fp8,0,0.010636799782514573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,2,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,4,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,2,128,1,fp8,fp8,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,4,128,1,float16,fp8,0,0.010364799946546554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,4,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,8,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,8,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,1,128,1,float16,float16,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,1,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,1,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,2,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,2,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,2,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,4,128,1,float16,float16,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,4,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,4,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,8,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,8,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,8,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,1,128,1,float16,float16,0,0.010335999727249145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,1,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,1,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,2,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,1,128,1,fp8,fp8,0,0.010547199845314026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,4,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,4,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,4,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,1,128,1,float16,float16,0,0.04965600073337555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,1,128,1,float16,fp8,0,0.04978399872779846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,1,128,1,fp8,fp8,0,0.049667200446128844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,2,128,1,float16,float16,0,0.04954400062561035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,2,128,1,float16,fp8,0,0.05044320225715637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,8,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,2,128,1,fp8,fp8,0,0.05140640139579773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,4,128,1,float16,float16,0,0.051704001426696775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,4,128,1,float16,fp8,0,0.05138400197029114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,4,128,1,fp8,fp8,0,0.051534402370452884
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,8,128,1,float16,float16,0,0.03115679919719696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,8,128,1,float16,fp8,0,0.029209598898887634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,8,128,1,fp8,fp8,0,0.02908160090446472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,1,128,1,float16,float16,0,0.030721598863601686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,1,128,1,float16,fp8,0,0.028863999247550964
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,1,128,1,fp8,fp8,0,0.029068800806999206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,2,128,1,float16,float16,0,0.029281601309776306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,2,128,1,float16,fp8,0,0.028958401083946227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,2,128,1,fp8,fp8,0,0.02890079915523529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,4,128,1,float16,float16,0,0.031033599376678468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,4,128,1,float16,fp8,0,0.02901279926300049
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,4,128,1,fp8,fp8,0,0.02900159955024719
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,8,128,1,float16,float16,0,0.020707200467586517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,8,128,1,float16,fp8,0,0.0199072003364563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,8,128,1,fp8,fp8,0,0.01957920044660568
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,1,128,1,float16,float16,0,0.019099199771881105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,1,128,1,float16,fp8,0,0.02064799964427948
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,1,128,1,fp8,fp8,0,0.02064639925956726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,2,128,1,float16,float16,0,0.02083519995212555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,2,128,1,float16,fp8,0,0.020628799498081208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,2,128,1,fp8,fp8,0,0.020664000511169435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,4,128,1,float16,float16,0,0.020638400316238405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,4,128,1,float16,fp8,0,0.02073120027780533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,4,128,1,fp8,fp8,0,0.020667199790477753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,8,128,1,float16,float16,0,0.014720000326633453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,8,128,1,float16,fp8,0,0.014440000057220459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,8,128,1,fp8,fp8,0,0.014457599818706512
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,1,128,1,float16,float16,0,0.01454399973154068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,2,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,1,128,1,float16,fp8,0,0.014508800208568573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,1,128,1,fp8,fp8,0,0.01456640064716339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,2,128,1,float16,float16,0,0.014715200662612915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,2,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,2,128,1,float16,fp8,0,0.014129599928855896
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,4,128,1,float16,float16,0,0.014560000598430633
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,2,128,1,fp8,fp8,0,0.014732800424098969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,4,128,1,fp8,fp8,0,0.014630399644374847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,8,128,1,float16,float16,0,0.01072480008006096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,8,128,1,float16,fp8,0,0.010761599987745285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,8,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,1,128,1,float16,float16,0,0.010595200210809707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,1,128,1,float16,fp8,0,0.010676799714565277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,1,128,1,fp8,fp8,0,0.010675200074911118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,2,128,1,float16,float16,0,0.010667199641466141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,2,128,1,float16,fp8,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,2,128,1,fp8,fp8,0,0.010625600069761276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,4,128,1,float16,fp8,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,4,128,1,fp8,fp8,0,0.010655999928712846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,8,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,8,128,1,float16,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,8,128,1,fp8,fp8,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,1,128,1,float16,float16,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,1,128,1,float16,fp8,0,0.010728000104427338
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,2,128,1,float16,float16,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,1,128,1,fp8,fp8,0,0.010843200236558914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,2,128,1,float16,fp8,0,0.010681600123643876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,2,128,1,fp8,fp8,0,0.01072319969534874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,4,128,1,float16,float16,0,0.010675200074911118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,4,128,1,float16,fp8,0,0.010667199641466141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,4,128,1,float16,fp8,0,0.014473600685596466
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,4,128,1,fp8,fp8,0,0.01072319969534874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,8,128,1,float16,fp8,0,0.01064639985561371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,1,128,1,float16,float16,0,0.010547199845314026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,1,128,1,float16,fp8,0,0.010367999970912933
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,1,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,2,128,1,float16,float16,0,0.010351999849081039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,2,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,4,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,2,128,1,fp8,fp8,0,0.010356800258159637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,4,128,1,float16,float16,0,0.010633599758148194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,4,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,4,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,8,128,1,float16,float16,0,0.010552000254392624
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,8,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,1,128,1,float16,float16,0,0.010606399923563003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,1,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,1,128,1,fp8,fp8,0,0.010360000282526016
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,2,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,2,128,1,float16,fp8,0,0.010713600367307664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,2,128,1,fp8,fp8,0,0.010331200063228607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,4,128,1,float16,float16,0,0.010564800351858139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,4,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,4,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,8,128,1,float16,float16,0,0.010636799782514573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,8,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,8,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,8,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,1,128,1,float16,fp8,0,0.010585600137710571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,1,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,2,128,1,float16,float16,0,0.01032480001449585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,2,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,2,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,4,128,1,float16,float16,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,4,128,1,float16,fp8,0,0.010286399722099304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,4,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,8,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,0,0.049449598789215087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,0,0.045484799146652224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,1,128,1,fp8,fp8,0,0.04555999934673309
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,0,0.04561119973659515
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,2,128,1,fp8,fp8,0,0.04628959894180298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,0,0.04955199956893921
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,0,0.04535680115222931
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,4,128,1,fp8,fp8,0,0.045433598756790164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,0,0.02901119887828827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,0,0.027526399493217467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,8,128,1,fp8,fp8,0,0.02698880136013031
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,0,0.029023998975753786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,8,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,0,0.026956799626350402
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,1,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,1,128,1,fp8,fp8,0,0.027000001072883605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,0,0.028990399837493897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,0,0.0270224004983902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,2,128,1,fp8,fp8,0,0.02694559991359711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,0,0.02909280061721802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,0,0.027046400308609008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,4,128,1,fp8,fp8,0,0.027081599831581114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,0,0.01926079988479614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,0,0.0195360004901886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,0,0.019361600279808044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,0,0.018825599551200868
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,1,128,1,fp8,fp8,0,0.018908800184726716
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,0,0.04945279955863953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,0,0.018911999464035035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,0,0.018680000305175783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,0,0.018833599984645844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,0,0.01865279972553253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,4,128,1,fp8,fp8,0,0.01871200054883957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,0,0.014617599546909332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,0,0.012939199805259705
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,8,128,1,fp8,fp8,0,0.01297920048236847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,0,0.014478400349617004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,0,0.014519999921321868
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,1,128,1,fp8,fp8,0,0.014508800208568573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,0,0.014567999541759491
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,0,0.013715200126171112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,2,128,1,fp8,fp8,0,0.01276479959487915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,0,0.014606399834156037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,0,0.012833599746227265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,4,128,1,fp8,fp8,0,0.012804800271987915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,0,0.010979200154542923
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,0,0.010547199845314026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,8,128,1,fp8,fp8,0,0.018798400461673737
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,8,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,0,0.012495999783277511
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,1,128,1,fp8,fp8,0,0.010665600001811982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,2,128,1,fp8,fp8,0,0.010623999685049058
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,0,0.010983999818563461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,2,128,1,fp8,fp8,0,0.018854400515556334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,0,0.010635200142860412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,4,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,8,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,1,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,2,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,4,128,1,fp8,fp8,0,0.010601600259542465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,8,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,0,0.010628800094127654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,0,0.01053600013256073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,2,128,1,fp8,fp8,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,4,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,8,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,1,128,1,fp8,fp8,0,0.01053600013256073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,2,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,4,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,0,0.010582400113344192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,8,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,1,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,0,0.009777600318193436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,1,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,2,128,1,fp8,fp8,0,0.009967999905347824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,0,0.009644799679517747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,4,128,1,fp8,fp8,0,0.009911999851465226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,0,0.010583999752998351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,4,1,128,1,float16,float16,0,1.665158462524414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,4,1,128,1,float16,fp8,0,1.5042367935180665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,4,1,128,1,fp8,fp8,0,1.510148811340332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,4,2,128,1,float16,fp8,0,1.5018303871154786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,4,2,128,1,float16,float16,0,1.9349199295043946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,4,128,1,float16,float16,0,0.9135087966918946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,4,128,1,float16,fp8,0,0.8159760475158692
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,4,128,1,fp8,fp8,0,0.8745375633239746
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,1,128,1,float16,float16,0,0.8878928184509277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,1,128,1,float16,fp8,0,0.8223952293395996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,1,128,1,fp8,fp8,0,0.8267151832580566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,2,128,1,float16,float16,0,0.8811552047729492
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,2,128,1,float16,fp8,0,0.8289440155029297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,4,128,1,float16,float16,0,0.529475212097168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,2,128,1,fp8,fp8,0,0.8976079940795898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,4,2,128,1,fp8,fp8,0,1.5136287689208985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,4,128,1,float16,fp8,0,0.48032479286193847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,4,128,1,fp8,fp8,0,0.47432799339294435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,1,128,1,float16,float16,0,0.5196415901184082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,1,128,1,float16,fp8,0,0.4818111896514893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,1,128,1,fp8,fp8,0,0.473908805847168
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,2,128,1,float16,float16,0,0.5200863838195801
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,2,128,1,float16,fp8,0,0.481496000289917
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,2,128,1,fp8,fp8,0,0.4743072032928467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,4,128,1,float16,float16,0,0.3295808076858521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,4,128,1,float16,fp8,0,0.3052560091018677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,4,128,1,fp8,fp8,0,0.29738080501556396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,1,128,1,float16,float16,0,0.3231647968292236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,1,128,1,float16,fp8,0,0.3028223991394043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,1,128,1,fp8,fp8,0,0.29920320510864257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,2,128,1,float16,fp8,0,0.3011120080947876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,2,128,1,fp8,fp8,0,0.30365281105041503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,4,1,128,1,float16,float16,0,0.9542736053466797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,4,1,128,1,float16,fp8,0,0.9289728164672851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,4,1,128,1,fp8,fp8,0,0.9317008018493652
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,4,2,128,1,float16,float16,0,0.9938511848449707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,4,2,128,1,float16,fp8,0,0.9292143821716309
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,4,2,128,1,fp8,fp8,0,0.928337574005127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,4,128,1,float16,float16,0,0.5691264152526856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,4,128,1,float16,fp8,0,0.5229680061340332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,1,128,1,float16,float16,0,0.5486256122589112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,2,128,1,float16,float16,0,0.31743199825286866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,1,128,1,float16,fp8,0,0.5188672065734863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,1,128,1,fp8,fp8,0,0.5131984233856202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,2,128,1,float16,float16,0,0.5447919845581055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,4,128,1,float16,float16,0,0.3319200038909912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,2,128,1,fp8,fp8,0,0.5125408172607422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,4,128,1,float16,fp8,0,0.3028255939483643
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,4,128,1,fp8,fp8,0,0.30273919105529784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,1,128,1,float16,fp8,0,0.30310399532318116
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,1,128,1,fp8,fp8,0,0.30155200958251954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,2,128,1,float16,float16,0,0.31739680767059325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,2,128,1,float16,fp8,0,0.30313920974731445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,2,128,1,fp8,fp8,0,0.302291202545166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,4,128,1,float16,float16,0,0.20650079250335693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,2,128,1,float16,fp8,0,0.5175583839416504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,4,128,1,float16,fp8,0,0.1924944043159485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,4,128,1,fp8,fp8,0,0.195032000541687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,1,128,1,float16,float16,0,0.20142879486083984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,1,128,1,float16,fp8,0,0.1945695996284485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,1,128,1,fp8,fp8,0,0.19244320392608644
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,2,128,1,float16,float16,0,0.2048975944519043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,2,128,1,float16,fp8,0,0.19283039569854737
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,1,128,1,float16,float16,0,0.31835041046142576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,4,1,128,1,float16,float16,0,0.7004032135009766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,4,1,128,1,float16,fp8,0,0.6856959819793701
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,4,128,1,fp8,fp8,0,0.5177792072296142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,4,1,128,1,fp8,fp8,0,0.6800559997558594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,4,2,128,1,float16,float16,0,0.7102735996246338
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,4,2,128,1,float16,fp8,0,0.6866096019744873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,4,128,1,float16,fp8,0,0.3906912088394165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,4,128,1,float16,float16,0,0.4177264213562012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,4,2,128,1,fp8,fp8,0,0.6801328182220459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,4,128,1,fp8,fp8,0,0.38935840129852295
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,1,128,1,float16,float16,0,0.4016287803649902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,1,128,1,float16,fp8,0,0.3847520112991333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,1,128,1,fp8,fp8,0,0.39002399444580077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,2,128,1,float16,float16,0,0.40193758010864256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,2,128,1,float16,fp8,0,0.3858943939208984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,2,128,1,fp8,fp8,0,0.19423359632492065
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,2,128,1,fp8,fp8,0,0.3895967960357666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,4,128,1,float16,float16,0,0.2522847890853882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,4,128,1,float16,fp8,0,0.23672640323638916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,4,128,1,fp8,fp8,0,0.23827519416809081
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,1,128,1,float16,float16,0,0.24644958972930908
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,1,128,1,fp8,fp8,0,0.23487839698791504
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,2,128,1,float16,float16,0,0.24692320823669434
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,2,128,1,float16,fp8,0,0.23865120410919188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,2,128,1,fp8,fp8,0,0.23633599281311035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,4,128,1,float16,float16,0,0.1524880051612854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,4,128,1,float16,fp8,0,0.1432960033416748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,4,128,1,fp8,fp8,0,0.14067519903182985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,1,128,1,float16,fp8,0,0.14434720277786256
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,1,128,1,fp8,fp8,0,0.14151999950408936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,2,128,1,float16,fp8,0,0.1414479970932007
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,2,128,1,fp8,fp8,0,0.14132959842681886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,4,1,128,1,float16,float16,0,0.8690159797668457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,1,128,1,float16,fp8,0,0.23604800701141357
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,4,1,128,1,float16,fp8,0,0.9528767585754394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,1,128,1,float16,float16,0,0.14882400035858154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,4,1,128,1,fp8,fp8,0,0.8795536041259766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,4,2,128,1,float16,float16,0,0.8752096176147461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,2,128,1,float16,float16,0,0.14833120107650757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,4,128,1,float16,fp8,0,0.48076319694519043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,4,2,128,1,float16,fp8,0,0.8832256317138671
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,4,2,128,1,fp8,fp8,0,0.8750176429748535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,4,128,1,fp8,fp8,0,0.4817039966583252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,1,128,1,float16,float16,0,0.4918960094451904
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,1,128,1,float16,fp8,0,0.47812957763671876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,1,128,1,fp8,fp8,0,0.48248801231384275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,2,128,1,float16,float16,0,0.48556318283081057
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,2,128,1,float16,fp8,0,0.48116159439086914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,2,128,1,fp8,fp8,0,0.47669758796691897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,4,128,1,float16,float16,0,0.2956144094467163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,4,128,1,float16,fp8,0,0.27584478855133054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,4,128,1,fp8,fp8,0,0.2780512094497681
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,1,128,1,float16,float16,0,0.27863359451293945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,1,128,1,float16,fp8,0,0.2778192043304443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,1,128,1,fp8,fp8,0,0.2747632026672363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,2,128,1,float16,float16,0,0.2845072031021118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,4,128,1,float16,float16,0,0.5968192100524903
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,2,128,1,float16,fp8,0,0.2733392000198364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,2,128,1,fp8,fp8,0,0.2783040046691895
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,4,128,1,float16,float16,0,0.1808351993560791
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,4,128,1,float16,fp8,0,0.17245279550552367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,4,128,1,fp8,fp8,0,0.17365920543670654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,1,128,1,float16,float16,0,0.17501599788665773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,1,128,1,float16,fp8,0,0.17344319820404053
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,1,128,1,fp8,fp8,0,0.173470401763916
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,2,128,1,float16,float16,0,0.1738752007484436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,2,128,1,float16,fp8,0,0.17253600358963012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,4,128,1,float16,float16,0,0.11135519742965698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,4,128,1,float16,fp8,0,0.10903359651565551
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,4,128,1,fp8,fp8,0,0.1087615966796875
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,1,128,1,float16,float16,0,0.11004480123519897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,1,128,1,float16,fp8,0,0.10908160209655762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,1,128,1,fp8,fp8,0,0.10882719755172729
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,2,128,1,float16,float16,0,0.11003999710083008
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,2,128,1,fp8,fp8,0,0.10945440530776977
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,2,128,1,float16,fp8,0,0.10851680040359497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,4,1,128,1,float16,float16,0,0.5360288143157959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,4,1,128,1,fp8,fp8,0,0.5518784046173095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,4,2,128,1,float16,float16,0,0.5360576152801514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,4,2,128,1,float16,fp8,0,0.5596560001373291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,4,2,128,1,fp8,fp8,0,0.5516687870025635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,4,128,1,float16,float16,0,0.3242016077041626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,4,128,1,fp8,fp8,0,0.31007359027862547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,1,128,1,float16,float16,0,0.3067375898361206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,4,1,128,1,float16,fp8,0,0.5545216083526612
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,1,128,1,float16,fp8,0,0.30980160236358645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,1,128,1,fp8,fp8,0,0.3071712017059326
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,2,128,1,float16,float16,0,0.3103408098220825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,2,128,1,float16,fp8,0,0.306822395324707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,4,128,1,float16,float16,0,0.18675199747085572
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,2,128,1,fp8,fp8,0,0.3098848104476929
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,4,128,1,float16,fp8,0,0.1772752046585083
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,2,128,1,fp8,fp8,0,0.17346400022506714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,4,128,1,fp8,fp8,0,0.1801344037055969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,1,128,1,float16,float16,0,0.17580640316009521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,1,128,1,float16,fp8,0,0.18139519691467285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,1,128,1,fp8,fp8,0,0.180350399017334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,4,128,1,float16,fp8,0,0.30556960105895997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,2,128,1,float16,float16,0,0.1767151951789856
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,2,128,1,float16,fp8,0,0.18027039766311645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,4,128,1,float16,float16,0,0.11460319757461548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,2,128,1,fp8,fp8,0,0.18101919889450074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,4,128,1,fp8,fp8,0,0.11318880319595337
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,1,128,1,float16,float16,0,0.11142719984054565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,1,128,1,float16,fp8,0,0.1133504033088684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,1,128,1,fp8,fp8,0,0.11271840333938599
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,2,128,1,float16,float16,0,0.11108640432357789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,2,128,1,float16,fp8,0,0.11286400556564331
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,4,128,1,float16,float16,0,0.08570240139961242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,4,128,1,float16,fp8,0,0.08417119979858398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,1,128,1,float16,float16,0,0.08412160277366638
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,1,128,1,float16,fp8,0,0.08411999940872192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,1,128,1,fp8,fp8,0,0.08377439975738525
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,2,128,1,float16,float16,0,0.08441280126571656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,2,128,1,float16,fp8,0,0.08422719836235046
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,2,128,1,fp8,fp8,0,0.08441280126571656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,4,128,1,float16,fp8,0,0.11146240234375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,4,1,128,1,float16,float16,0,0.5126575946807861
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,4,1,128,1,float16,fp8,0,0.5555263996124268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,2,128,1,fp8,fp8,0,0.11348639726638794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,4,1,128,1,fp8,fp8,0,0.5581408023834229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,4,128,1,fp8,fp8,0,0.08415200114250183
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,4,2,128,1,float16,fp8,0,0.5595935821533203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,4,2,128,1,fp8,fp8,0,0.5586575984954834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,4,128,1,float16,float16,0,0.2951567888259888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,4,128,1,float16,fp8,0,0.3033423900604248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,4,128,1,fp8,fp8,0,0.30305919647216795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,1,128,1,float16,float16,0,0.2776079893112183
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,1,128,1,float16,fp8,0,0.3045792102813721
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,1,128,1,fp8,fp8,0,0.30364160537719725
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,2,128,1,float16,fp8,0,0.30390880107879636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,2,128,1,fp8,fp8,0,0.30226080417633056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,4,128,1,float16,float16,0,0.17331839799880983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,4,128,1,float16,fp8,0,0.17159839868545532
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,4,128,1,fp8,fp8,0,0.17157119512557983
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,1,128,1,float16,float16,0,0.16154559850692748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,1,128,1,float16,fp8,0,0.17012959718704224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,2,128,1,float16,float16,0,0.16344799995422363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,1,128,1,fp8,fp8,0,0.17095520496368408
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,2,128,1,fp8,fp8,0,0.16965119838714598
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,4,128,1,float16,float16,0,0.10711679458618165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,4,2,128,1,float16,float16,0,0.5030896186828613
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,4,128,1,float16,fp8,0,0.10480639934539795
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,4,128,1,fp8,fp8,0,0.10363520383834839
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,1,128,1,float16,float16,0,0.10089919567108155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,1,128,1,float16,fp8,0,0.10489280223846435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,1,128,1,fp8,fp8,0,0.10472160577774048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,2,128,1,float16,float16,0,0.1006432056427002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,2,128,1,float16,fp8,0,0.10488959550857543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,2,128,1,fp8,fp8,0,0.10541280508041381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,4,128,1,float16,float16,0,0.06431199908256531
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,4,128,1,float16,fp8,0,0.06481760144233703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,4,128,1,fp8,fp8,0,0.06533759832382202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,1,128,1,float16,float16,0,0.0623744010925293
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,2,128,1,float16,fp8,0,0.1722864031791687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,1,128,1,fp8,fp8,0,0.06564639806747437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,2,128,1,float16,float16,0,0.06189759969711304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,2,128,1,float16,fp8,0,0.064300799369812
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,2,128,1,fp8,fp8,0,0.06503679752349853
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,4,128,1,float16,float16,0,0.059935998916625974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,4,128,1,float16,fp8,0,0.05973920226097107
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,2,128,1,float16,float16,0,0.2778480052947998
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,4,128,1,fp8,fp8,0,0.05974400043487549
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,1,128,1,float16,fp8,0,0.059747201204299924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,1,128,1,float16,float16,0,0.05781919956207275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,1,128,1,fp8,fp8,0,0.05976639986038208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,2,128,1,float16,float16,0,0.05816799998283386
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,2,128,1,float16,fp8,0,0.05984640121459961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,4,1,128,1,float16,float16,0,0.3231744050979614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,4,1,128,1,float16,fp8,0,0.36771841049194337
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,1,128,1,float16,fp8,0,0.06591839790344238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,4,1,128,1,fp8,fp8,0,0.3679856061935425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,4,2,128,1,float16,float16,0,0.3163984060287476
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,4,2,128,1,fp8,fp8,0,0.3683648109436035
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,4,128,1,float16,float16,0,0.1924191951751709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,4,128,1,float16,fp8,0,0.20064160823822022
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,4,128,1,fp8,fp8,0,0.1989840030670166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,1,128,1,float16,float16,0,0.18007839918136598
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,1,128,1,float16,fp8,0,0.19931360483169555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,1,128,1,fp8,fp8,0,0.20083041191101075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,2,128,1,float16,float16,0,0.17844640016555785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,2,128,1,float16,fp8,0,0.2006608009338379
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,2,128,1,fp8,fp8,0,0.05963839888572693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,2,128,1,fp8,fp8,0,0.19922399520874023
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,4,128,1,float16,float16,0,0.11372159719467163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,4,128,1,float16,fp8,0,0.11328799724578857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,4,128,1,fp8,fp8,0,0.11201280355453491
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,1,128,1,float16,float16,0,0.10314079523086547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,1,128,1,fp8,fp8,0,0.11289440393447876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,2,128,1,float16,float16,0,0.104038405418396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,2,128,1,float16,fp8,0,0.1122048020362854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,2,128,1,fp8,fp8,0,0.11402720212936401
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,4,128,1,float16,float16,0,0.0690447986125946
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,4,128,1,float16,fp8,0,0.06987199783325196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,4,128,1,fp8,fp8,0,0.069896000623703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,1,128,1,float16,float16,0,0.06518399715423584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,1,128,1,fp8,fp8,0,0.06997759938240052
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,2,128,1,float16,float16,0,0.06569920182228088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,2,128,1,float16,fp8,0,0.06988160014152527
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,4,2,128,1,float16,fp8,0,0.3684351921081543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,2,128,1,fp8,fp8,0,0.06999840140342713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,4,128,1,float16,float16,0,0.05132480263710022
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,4,128,1,fp8,fp8,0,0.05151360034942627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,1,128,1,float16,float16,0,0.04933600127696991
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,1,128,1,float16,fp8,0,0.05147839784622192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,1,128,1,fp8,fp8,0,0.05146880149841308
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,2,128,1,float16,float16,0,0.04876160025596619
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,2,128,1,float16,fp8,0,0.05151680111885071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,2,128,1,fp8,fp8,0,0.051412802934646604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,4,128,1,float16,float16,0,0.04743840098381043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,4,128,1,float16,fp8,0,0.047286400198936464
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,4,128,1,fp8,fp8,0,0.04741120040416717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,1,128,1,float16,float16,0,0.04732159972190857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,1,128,1,float16,fp8,0,0.047409600019454955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,1,128,1,fp8,fp8,0,0.04741599857807159
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,2,128,1,float16,float16,0,0.04742400050163269
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,1,128,1,float16,fp8,0,0.06986879706382751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,2,128,1,float16,fp8,0,0.047356799244880676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,2,128,1,fp8,fp8,0,0.047228801250457766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,4,1,128,1,float16,float16,0,0.31847519874572755
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,4,1,128,1,float16,fp8,0,0.3895456075668335
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,4,128,1,float16,fp8,0,0.051523202657699586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,4,2,128,1,float16,float16,0,0.3102175951004028
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,4,1,128,1,fp8,fp8,0,0.39384000301361083
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,4,2,128,1,float16,fp8,0,0.39227519035339353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,4,128,1,float16,float16,0,0.19407520294189454
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,4,128,1,float16,fp8,0,0.2111151933670044
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,4,2,128,1,fp8,fp8,0,0.38932640552520753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,4,128,1,fp8,fp8,0,0.2114176034927368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,1,128,1,float16,float16,0,0.17414560317993164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,1,128,1,float16,fp8,0,0.2077183961868286
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,2,128,1,float16,float16,0,0.17485599517822265
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,2,128,1,float16,fp8,0,0.20739998817443847
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,1,128,1,float16,fp8,0,0.11223039627075196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,2,128,1,fp8,fp8,0,0.20967040061950684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,4,128,1,float16,fp8,0,0.11540800333023071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,4,128,1,fp8,fp8,0,0.11615999937057495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,1,128,1,float16,float16,0,0.09529759883880615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,1,128,1,float16,fp8,0,0.11446399688720703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,1,128,1,fp8,fp8,0,0.11290240287780762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,2,128,1,float16,float16,0,0.09873759746551514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,2,128,1,float16,fp8,0,0.11395360231399536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,4,128,1,float16,float16,0,0.06579840183258057
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,4,128,1,float16,fp8,0,0.06868799924850463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,1,128,1,fp8,fp8,0,0.21144800186157225
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,4,128,1,fp8,fp8,0,0.07008000016212464
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,4,128,1,float16,float16,0,0.11032799482345582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,1,128,1,float16,float16,0,0.06127039790153503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,1,128,1,fp8,fp8,0,0.07012320160865784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,2,128,1,float16,float16,0,0.06167680025100708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,2,128,1,fp8,fp8,0,0.1146880030632019
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,2,128,1,float16,fp8,0,0.06984800100326538
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,2,128,1,fp8,fp8,0,0.06865440011024475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,4,128,1,float16,float16,0,0.041142401099205014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,4,128,1,float16,fp8,0,0.043510401248931886
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,4,128,1,fp8,fp8,0,0.04328480064868927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,1,128,1,float16,float16,0,0.03727520108222961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,1,128,1,float16,fp8,0,0.04324800074100495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,2,128,1,float16,float16,0,0.03739840090274811
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,1,128,1,fp8,fp8,0,0.04315359890460968
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,2,128,1,float16,fp8,0,0.04302240014076233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,2,128,1,fp8,fp8,0,0.04304639995098114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,4,128,1,float16,float16,0,0.03710240125656128
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,4,128,1,float16,fp8,0,0.03730080127716064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,4,128,1,fp8,fp8,0,0.037212800979614255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,1,128,1,float16,float16,0,0.03524320125579834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,1,128,1,fp8,fp8,0,0.037169599533081056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,2,128,1,float16,float16,0,0.03540160059928894
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,2,128,1,float16,fp8,0,0.03728159964084625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,2,128,1,fp8,fp8,0,0.0372512012720108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,4,128,1,float16,float16,0,0.03522239923477173
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,4,128,1,float16,fp8,0,0.03519200086593628
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,4,128,1,fp8,fp8,0,0.035102400183677676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,1,128,1,float16,float16,0,0.03506399989128113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,1,128,1,float16,fp8,0,0.035041600465774536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,1,128,1,fp8,fp8,0,0.03517119884490967
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,2,128,1,float16,float16,0,0.03505600094795227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,2,128,1,float16,fp8,0,0.03505760133266449
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,2,128,1,fp8,fp8,0,0.03517279922962189
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,4,1,128,1,float16,float16,0,0.20780160427093505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,4,1,128,1,float16,fp8,0,0.2652431964874268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,4,1,128,1,fp8,fp8,0,0.26836159229278567
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,4,2,128,1,float16,float16,0,0.2019711971282959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,4,2,128,1,float16,fp8,0,0.26865758895874026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,4,2,128,1,fp8,fp8,0,0.26512799263000486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,4,128,1,float16,float16,0,0.1303984045982361
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,4,128,1,float16,fp8,0,0.14276479482650756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,1,128,1,float16,fp8,0,0.03712320029735565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,4,128,1,fp8,fp8,0,0.14494080543518068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,1,128,1,float16,float16,0,0.11182399988174438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,1,128,1,float16,fp8,0,0.14275679588317872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,1,128,1,fp8,fp8,0,0.14301600456237792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,2,128,1,float16,float16,0,0.11330879926681518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,2,128,1,float16,fp8,0,0.14357600212097169
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,4,128,1,float16,float16,0,0.0752560019493103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,2,128,1,fp8,fp8,0,0.1443120002746582
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,1,128,1,float16,float16,0,0.06579359769821166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,1,128,1,float16,fp8,0,0.08059999942779542
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,1,128,1,fp8,fp8,0,0.08026400208473206
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,2,128,1,float16,float16,0,0.06707040071487427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,2,128,1,float16,fp8,0,0.08041120171546937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,2,128,1,fp8,fp8,0,0.08034560084342957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,4,128,1,float16,float16,0,0.04684639871120453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,4,128,1,float16,fp8,0,0.04946399927139282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,4,128,1,fp8,fp8,0,0.049449598789215087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,1,128,1,float16,float16,0,0.04333919882774353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,1,128,1,float16,fp8,0,0.049435201287269595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,1,128,1,fp8,fp8,0,0.0494271993637085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,2,128,1,float16,float16,0,0.04333600103855133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,2,128,1,float16,fp8,0,0.049460801482200625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,2,128,1,fp8,fp8,0,0.049449598789215087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,4,128,1,float16,float16,0,0.03285120129585266
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,4,128,1,float16,fp8,0,0.03499200046062469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,4,128,1,float16,fp8,0,0.08127999901771546
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,4,128,1,fp8,fp8,0,0.03496319949626923
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,1,128,1,float16,float16,0,0.031089600920677186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,1,128,1,float16,fp8,0,0.03498879969120026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,1,128,1,fp8,fp8,0,0.03500959873199463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,2,128,1,float16,float16,0,0.030976000428199767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,2,128,1,float16,fp8,0,0.035006400942802426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,2,128,1,fp8,fp8,0,0.03510560095310211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,4,128,1,float16,float16,0,0.030849599838256837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,4,128,1,float16,fp8,0,0.03094080090522766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,4,128,1,fp8,fp8,0,0.030878400802612303
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,1,128,1,float16,float16,0,0.028918400406837463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,1,128,1,float16,fp8,0,0.031033599376678468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,1,128,1,fp8,fp8,0,0.03091199994087219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,2,128,1,float16,float16,0,0.028996801376342772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,2,128,1,float16,fp8,0,0.031001600623130798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,2,128,1,fp8,fp8,0,0.030833598971366883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,4,128,1,float16,float16,0,0.02903839945793152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,4,128,1,float16,fp8,0,0.02892799973487854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,4,128,1,fp8,fp8,0,0.028836798667907716
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,1,128,1,float16,float16,0,0.028937599062919615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,1,128,1,float16,fp8,0,0.02884800136089325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,1,128,1,fp8,fp8,0,0.028963199257850646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,2,128,1,float16,float16,0,0.0289247989654541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,2,128,1,float16,fp8,0,0.029046401381492615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,2,128,1,fp8,fp8,0,0.028963199257850646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,4,1,128,1,float16,float16,0,0.2192768096923828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,4,1,128,1,float16,fp8,0,0.30652480125427245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,4,1,128,1,fp8,fp8,0,0.3062832117080688
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,4,2,128,1,float16,float16,0,0.21662719249725343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,4,2,128,1,float16,fp8,0,0.30655679702758787
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,4,2,128,1,fp8,fp8,0,0.3054464101791382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,4,128,1,fp8,fp8,0,0.08095520138740539
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,4,128,1,float16,float16,0,0.13891680240631105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,4,128,1,float16,fp8,0,0.16042239665985109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,4,128,1,fp8,fp8,0,0.16245280504226683
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,1,128,1,float16,float16,0,0.11606080532073974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,1,128,1,float16,fp8,0,0.16171679496765137
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,1,128,1,fp8,fp8,0,0.15970879793167114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,2,128,1,float16,float16,0,0.1192255973815918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,2,128,1,float16,fp8,0,0.16025760173797607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,2,128,1,fp8,fp8,0,0.16189759969711304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,4,128,1,float16,float16,0,0.07797440290451049
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,4,128,1,float16,fp8,0,0.0884335994720459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,4,128,1,fp8,fp8,0,0.0882095992565155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,1,128,1,float16,float16,0,0.06594399809837341
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,1,128,1,float16,fp8,0,0.08645600080490112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,1,128,1,fp8,fp8,0,0.08665599822998046
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,2,128,1,float16,float16,0,0.06742240190505981
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,2,128,1,float16,fp8,0,0.08788959980010987
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,2,128,1,fp8,fp8,0,0.0869920015335083
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,4,128,1,float16,fp8,0,0.0513759970664978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,4,128,1,fp8,fp8,0,0.05147839784622192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,1,128,1,float16,float16,0,0.041264000535011294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,1,128,1,float16,fp8,0,0.05142880082130432
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,1,128,1,fp8,fp8,0,0.051419198513031006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,2,128,1,float16,float16,0,0.041238400340080264
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,1,128,1,float16,fp8,0,0.0686303973197937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,2,128,1,float16,fp8,0,0.0514240026473999
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,4,128,1,float16,float16,0,0.027859199047088622
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,4,128,1,float16,fp8,0,0.030935999751091004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,4,128,1,fp8,fp8,0,0.03091840147972107
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,1,128,1,float16,float16,0,0.024873599410057068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,1,128,1,float16,fp8,0,0.030910399556159974
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,1,128,1,fp8,fp8,0,0.03091840147972107
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,2,128,1,float16,float16,0,0.02481600046157837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,2,128,1,float16,fp8,0,0.03094240128993988
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,2,128,1,fp8,fp8,0,0.0308896005153656
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,4,128,1,float16,float16,0,0.024740800261497498
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,4,128,1,float16,fp8,0,0.02687999904155731
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,4,128,1,fp8,fp8,0,0.026844799518585205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,4,128,1,float16,float16,0,0.04711999893188477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,1,128,1,float16,float16,0,0.03933440148830414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,1,128,1,float16,fp8,0,0.02674719989299774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,1,128,1,fp8,fp8,0,0.0250463992357254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,2,128,1,float16,float16,0,0.022780799865722658
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,2,128,1,float16,fp8,0,0.025390401482582092
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,2,128,1,fp8,fp8,0,0.026785600185394286
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,4,128,1,float16,fp8,0,0.022767999768257143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,1,128,1,float16,float16,0,0.02274080067873001
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,1,128,1,float16,fp8,0,0.02316959947347641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,1,128,1,fp8,fp8,0,0.02316479980945587
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,2,128,1,float16,float16,0,0.022752000391483305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,2,128,1,float16,fp8,0,0.02370080053806305
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,2,128,1,fp8,fp8,0,0.05151680111885071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,2,128,1,fp8,fp8,0,0.02385119944810867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,4,128,1,float16,float16,0,0.023582400381565095
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,4,128,1,float16,fp8,0,0.02271360009908676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,4,128,1,fp8,fp8,0,0.02269120067358017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,1,128,1,float16,fp8,0,0.02282080054283142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,1,128,1,fp8,fp8,0,0.02263360023498535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,2,128,1,float16,float16,0,0.02277279943227768
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,2,128,1,float16,fp8,0,0.022678400576114654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,2,128,1,fp8,fp8,0,0.02279839962720871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,4,1,128,1,float16,float16,0,0.16531200408935548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,4,1,128,1,float16,fp8,0,0.25897119045257566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,4,1,128,1,fp8,fp8,0,0.2594063997268677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,4,128,1,fp8,fp8,0,0.024689599871635437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,4,2,128,1,float16,float16,0,0.16528160572052003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,4,2,128,1,float16,fp8,0,0.26059200763702395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,4,128,1,float16,float16,0,0.11145119667053223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,4,128,1,float16,fp8,0,0.13759679794311525
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,1,128,1,float16,float16,0,0.022750400006771088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,4,128,1,fp8,fp8,0,0.13761279582977295
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,1,128,1,float16,float16,0,0.08934720158576966
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,1,128,1,float16,fp8,0,0.13701119422912597
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,2,128,1,float16,float16,0,0.0910319983959198
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,2,128,1,float16,fp8,0,0.13716479539871215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,2,128,1,fp8,fp8,0,0.13725440502166747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,4,128,1,float16,float16,0,0.062299197912216185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,4,128,1,float16,fp8,0,0.07443680167198181
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,4,128,1,fp8,fp8,0,0.07441279888153077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,1,128,1,float16,float16,0,0.050342398881912234
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,1,128,1,float16,fp8,0,0.07350239753723145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,4,128,1,float16,float16,0,0.022859199345111846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,2,128,1,float16,float16,0,0.052481597661972045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,4,2,128,1,fp8,fp8,0,0.2590912103652954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,2,128,1,float16,fp8,0,0.07345759868621826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,4,128,1,float16,float16,0,0.03743360042572021
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,4,128,1,float16,fp8,0,0.04352479875087738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,4,128,1,fp8,fp8,0,0.0435344010591507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,1,128,1,float16,float16,0,0.03139359951019287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,1,128,1,fp8,fp8,0,0.13512799739837647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,1,128,1,float16,fp8,0,0.04347040057182312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,1,128,1,fp8,fp8,0,0.04221439957618713
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,2,128,1,float16,float16,0,0.03126879930496216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,2,128,1,float16,fp8,0,0.042900800704956055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,4,128,1,float16,float16,0,0.02078240066766739
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,4,128,1,float16,fp8,0,0.024803200364112855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,4,128,1,fp8,fp8,0,0.025070399045944214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,1,128,1,float16,float16,0,0.018855999410152435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,1,128,1,fp8,fp8,0,0.073225599527359
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,1,128,1,float16,fp8,0,0.026791998744010927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,1,128,1,fp8,fp8,0,0.02505280077457428
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,2,128,1,float16,float16,0,0.02040960043668747
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,2,128,1,fp8,fp8,0,0.07202399969100952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,2,128,1,float16,fp8,0,0.024935999512672426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,4,128,1,float16,float16,0,0.018848000466823576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,4,128,1,float16,fp8,0,0.02083519995212555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,4,128,1,fp8,fp8,0,0.020844799280166627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,1,128,1,float16,float16,0,0.01865759938955307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,1,128,1,float16,fp8,0,0.02099040001630783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,1,128,1,fp8,fp8,0,0.020878399908542632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,2,128,1,float16,float16,0,0.018774400651454925
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,2,128,1,float16,fp8,0,0.02093919962644577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,2,128,1,fp8,fp8,0,0.043459200859069826
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,2,128,1,fp8,fp8,0,0.02078080028295517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,4,128,1,float16,fp8,0,0.0186271995306015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,4,128,1,fp8,fp8,0,0.018779200315475465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,1,128,1,float16,float16,0,0.016790400445461272
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,1,128,1,float16,fp8,0,0.018787199258804323
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,1,128,1,fp8,fp8,0,0.018670399487018586
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,2,128,1,float16,float16,0,0.01679839938879013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,2,128,1,float16,fp8,0,0.018692800402641298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,2,128,1,fp8,fp8,0,0.018654400110244752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,2,128,1,fp8,fp8,0,0.025224000215530396
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,4,128,1,float16,fp8,0,0.01679999977350235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,4,128,1,fp8,fp8,0,0.017638400197029114
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,1,128,1,float16,float16,0,0.016681599617004394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,1,128,1,float16,fp8,0,0.016641600430011748
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,2,128,1,float16,float16,0,0.01666080057621002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,1,128,1,fp8,fp8,0,0.017110399901866913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,2,128,1,float16,fp8,0,0.0166143998503685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,4,128,1,float16,float16,0,0.016704000532627106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,4,128,1,float16,fp8,0,0.016728000342845918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,4,128,1,fp8,fp8,0,0.016651199758052827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,4,128,1,float16,float16,0,0.01865919977426529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,1,128,1,float16,float16,0,0.016704000532627106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,1,128,1,fp8,fp8,0,0.016755199432373045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,1,128,1,float16,fp8,0,0.016924799978733064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,2,128,1,float16,float16,0,0.016686399281024934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,2,128,1,float16,fp8,0,0.016915200650691985
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,2,128,1,fp8,fp8,0,0.016545599699020384
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,4,1,128,1,float16,float16,0,0.07562239766120911
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,4,1,128,1,float16,fp8,0,0.12126719951629639
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,4,2,128,1,float16,float16,0,0.07475039958953858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,4,2,128,1,float16,fp8,0,0.12142239809036255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,4,2,128,1,fp8,fp8,0,0.12138400077819825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,4,128,1,float16,float16,0,0.05559999942779541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,4,128,1,float16,float16,0,0.016739200055599212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,4,128,1,float16,fp8,0,0.06784960031509399
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,4,128,1,fp8,fp8,0,0.06812160015106201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,1,128,1,float16,float16,0,0.0432671993970871
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,1,128,1,float16,fp8,0,0.06592159867286682
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,1,128,1,fp8,fp8,0,0.06575040221214294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,2,128,1,float16,float16,0,0.04551199972629547
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,2,128,1,float16,fp8,0,0.06578879952430725
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,4,128,1,float16,float16,0,0.030953601002693176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,2,128,1,fp8,fp8,0,0.017207999527454377
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,4,128,1,float16,fp8,0,0.03713119924068451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,4,128,1,fp8,fp8,0,0.03710240125656128
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,1,128,1,float16,fp8,0,0.03723680078983307
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,1,128,1,float16,float16,0,0.025209599733352663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,1,128,1,fp8,fp8,0,0.03712159991264343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,2,128,1,float16,float16,0,0.025176000595092774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,2,128,1,float16,fp8,0,0.03712159991264343
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,2,128,1,fp8,fp8,0,0.037217599153518674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,4,128,1,float16,float16,0,0.018739199638366698
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,4,128,1,float16,fp8,0,0.02279199957847595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,1,128,1,float16,float16,0,0.01671839952468872
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,4,1,128,1,fp8,fp8,0,0.13772159814834595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,1,128,1,float16,fp8,0,0.022916799783706664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,1,128,1,fp8,fp8,0,0.022864000499248506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,2,128,1,float16,float16,0,0.016708800196647645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,2,128,1,fp8,fp8,0,0.022881600260734557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,4,128,1,float16,float16,0,0.01658080071210861
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,4,128,1,float16,fp8,0,0.018782399594783783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,4,128,1,fp8,fp8,0,0.018718400597572328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,1,128,1,float16,float16,0,0.014726400375366211
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,1,128,1,float16,fp8,0,0.018772800266742707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,2,128,1,fp8,fp8,0,0.06756319999694824
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,1,128,1,fp8,fp8,0,0.01870879977941513
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,2,128,1,float16,float16,0,0.014646400511264802
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,2,128,1,float16,fp8,0,0.018855999410152435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,2,128,1,fp8,fp8,0,0.01860959976911545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,4,128,1,float16,float16,0,0.014735999703407287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,4,128,1,fp8,fp8,0,0.01672479957342148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,1,128,1,float16,float16,0,0.014616000652313232
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,1,128,1,float16,fp8,0,0.016572800278663636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,1,128,1,fp8,fp8,0,0.014713600277900696
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,4,128,1,fp8,fp8,0,0.02271520048379898
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,2,128,1,float16,fp8,0,0.015398399531841278
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,2,128,1,fp8,fp8,0,0.014584000408649444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,4,128,1,float16,float16,0,0.014553600549697876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,2,128,1,float16,fp8,0,0.022860799729824067
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,4,128,1,float16,fp8,0,0.014531199634075165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,4,128,1,fp8,fp8,0,0.014641599357128143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,1,128,1,float16,float16,0,0.012566399574279786
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,1,128,1,float16,fp8,0,0.014657600224018097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,1,128,1,fp8,fp8,0,0.014617599546909332
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,2,128,1,float16,float16,0,0.014508800208568573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,2,128,1,float16,fp8,0,0.014555199444293976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,2,128,1,fp8,fp8,0,0.014532800018787383
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,4,128,1,float16,float16,0,0.014486399292945863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,4,128,1,float16,fp8,0,0.014561599493026734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,4,128,1,fp8,fp8,0,0.014526399970054626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,1,128,1,float16,float16,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,1,128,1,float16,fp8,0,0.012836800515651703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,1,128,1,fp8,fp8,0,0.013191999495029449
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,4,128,1,float16,fp8,0,0.016476799547672272
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,2,128,1,float16,float16,0,0.012571200728416443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,2,128,1,float16,fp8,0,0.014484800398349762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,2,128,1,fp8,fp8,0,0.013643200695514678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,4,128,1,float16,float16,0,0.014422400295734406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,4,128,1,float16,fp8,0,0.01255200058221817
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,4,128,1,fp8,fp8,0,0.014398400485515595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,1,128,1,float16,float16,0,0.014427199959754944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,1,128,1,float16,fp8,0,0.013091200590133667
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,1,128,1,fp8,fp8,0,0.013254399597644805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,2,128,1,float16,float16,0,0.014494399726390838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,2,128,1,float16,fp8,0,0.014481599628925323
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,2,128,1,fp8,fp8,0,0.014483200013637542
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,2,128,1,float16,float16,0,0.014575999975204468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,4,1,128,1,float16,float16,0,0.046275201439857486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,4,1,128,1,float16,fp8,0,0.06980800032615661
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,4,2,128,1,float16,float16,0,0.04737440049648285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,4,1,128,1,fp8,fp8,0,0.0681775987148285
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,4,2,128,1,float16,fp8,0,0.07003200054168701
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,4,2,128,1,fp8,fp8,0,0.069896000623703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,4,128,1,float16,fp8,0,0.039211198687553406
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,1,128,1,float16,float16,0,0.028388801217079162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,1,128,1,float16,fp8,0,0.03919999897480011
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,1,128,1,fp8,fp8,0,0.039208000898361205
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,2,128,1,float16,float16,0,0.02690559923648834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,2,128,1,float16,fp8,0,0.03923520147800445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,2,128,1,fp8,fp8,0,0.03915359973907471
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,4,128,1,float16,float16,0,0.020609599351882935
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,4,128,1,float16,fp8,0,0.024750399589538574
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,4,128,1,fp8,fp8,0,0.024758400022983552
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,1,128,1,float16,float16,0,0.018592000007629395
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,1,128,1,float16,fp8,0,0.02480800002813339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,1,128,1,fp8,fp8,0,0.024710400402545928
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,2,128,1,float16,float16,0,0.01860159933567047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,2,128,1,float16,fp8,0,0.02468640059232712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,2,128,1,fp8,fp8,0,0.024670399725437164
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,4,128,1,float16,float16,0,0.014590400457382201
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,4,128,1,float16,fp8,0,0.016527999937534333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,4,128,1,fp8,fp8,0,0.0165120005607605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,1,128,1,float16,fp8,0,0.016574400663375854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,1,128,1,float16,float16,0,0.012966400384902954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,1,128,1,fp8,fp8,0,0.016463999450206757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,2,128,1,float16,fp8,0,0.016510400176048278
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,4,128,1,float16,float16,0,0.03322719931602478
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,2,128,1,fp8,fp8,0,0.016527999937534333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,4,128,1,fp8,fp8,0,0.039297598600387576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,4,128,1,float16,float16,0,0.012582400441169738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,4,128,1,float16,fp8,0,0.01260959953069687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,4,128,1,fp8,fp8,0,0.01247360035777092
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,1,128,1,float16,float16,0,0.01148959994316101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,1,128,1,fp8,fp8,0,0.01252799928188324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,2,128,1,float16,float16,0,0.012457600235939026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,2,128,1,float16,fp8,0,0.012489599734544754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,2,128,1,fp8,fp8,0,0.012647999823093415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,4,128,1,float16,float16,0,0.010502400249242783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,4,128,1,float16,fp8,0,0.012617599964141846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,1,128,1,float16,float16,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,1,128,1,float16,fp8,0,0.01252640038728714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,1,128,1,fp8,fp8,0,0.012580800056457519
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,2,128,1,float16,float16,0,0.01061599999666214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,2,128,1,float16,fp8,0,0.012627199292182922
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,2,128,1,fp8,fp8,0,0.01263200044631958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,4,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,4,128,1,float16,fp8,0,0.01063840016722679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,4,128,1,fp8,fp8,0,0.010630399733781815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,1,128,1,float16,float16,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,1,128,1,float16,fp8,0,0.010558400303125381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,1,128,1,fp8,fp8,0,0.010579200088977813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,2,128,1,float16,float16,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,2,128,1,float16,float16,0,0.012651200592517852
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,2,128,1,fp8,fp8,0,0.01266079992055893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,1,128,1,float16,fp8,0,0.013977600634098053
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,4,128,1,float16,fp8,0,0.010739199817180634
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,4,128,1,fp8,fp8,0,0.010713600367307664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,1,128,1,float16,float16,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,4,128,1,fp8,fp8,0,0.01250240057706833
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,1,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,1,128,1,float16,fp8,0,0.01053759977221489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,2,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,2,128,1,float16,fp8,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,2,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,4,128,1,float16,float16,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,4,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,4,128,1,fp8,fp8,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,1,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,1,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,1,128,1,fp8,fp8,0,0.010542400181293488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,2,128,1,float16,float16,0,0.010622400045394897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,2,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,4,1,128,1,float16,float16,0,0.036504000425338745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,4,1,128,1,float16,fp8,0,0.04726879894733429
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,4,1,128,1,fp8,fp8,0,0.0472544014453888
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,2,128,1,float16,fp8,0,0.010913600027561188
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,4,2,128,1,float16,float16,0,0.03682399988174438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,4,128,1,float16,float16,0,0.011262399703264236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,4,2,128,1,float16,fp8,0,0.04742079973220825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,4,2,128,1,fp8,fp8,0,0.04771519899368286
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,4,128,1,float16,float16,0,0.025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,4,128,1,float16,fp8,0,0.0291375994682312
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,1,128,1,float16,float16,0,0.022915199398994446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,1,128,1,float16,fp8,0,0.02940160036087036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,1,128,1,fp8,fp8,0,0.029123198986053467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,2,128,1,float16,float16,0,0.023022399842739107
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,2,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,2,128,1,float16,fp8,0,0.029203200340270997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,2,128,1,fp8,fp8,0,0.029017600417137145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,4,128,1,float16,fp8,0,0.018743999302387238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,4,128,1,fp8,fp8,0,0.018750399351119995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,1,128,1,float16,float16,0,0.01666080057621002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,1,128,1,float16,fp8,0,0.01884479969739914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,1,128,1,fp8,fp8,0,0.018692800402641298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,2,128,1,float16,float16,0,0.015894399583339693
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,2,128,1,float16,fp8,0,0.01865919977426529
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,2,128,1,fp8,fp8,0,0.018718400597572328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,4,128,1,float16,float16,0,0.012683199346065521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,4,128,1,fp8,fp8,0,0.029080000519752503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,4,128,1,float16,fp8,0,0.012811200320720672
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,4,128,1,fp8,fp8,0,0.012734399735927581
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,1,128,1,float16,fp8,0,0.012716799974441528
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,1,128,1,fp8,fp8,0,0.012721599638462066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,2,128,1,float16,float16,0,0.01239359974861145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,2,128,1,float16,fp8,0,0.012624000012874604
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,2,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,4,128,1,float16,float16,0,0.01058719977736473
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,4,128,1,float16,fp8,0,0.012451200187206269
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,4,128,1,fp8,fp8,0,0.012582400441169738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,1,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,4,128,1,float16,float16,0,0.016766400635242464
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,1,128,1,fp8,fp8,0,0.01252640038728714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,2,128,1,float16,float16,0,0.01074720025062561
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,2,128,1,fp8,fp8,0,0.012569600343704223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,4,128,1,float16,float16,0,0.010764800012111664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,4,128,1,float16,fp8,0,0.010979200154542923
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,4,128,1,fp8,fp8,0,0.01101600006222725
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,1,128,1,float16,float16,0,0.010649599879980088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,1,128,1,float16,fp8,0,0.01069760024547577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,1,128,1,fp8,fp8,0,0.010751999914646149
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,2,128,1,float16,float16,0,0.010676799714565277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,2,128,1,float16,fp8,0,0.011057599633932113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,2,128,1,fp8,fp8,0,0.010908800363540649
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,4,128,1,float16,float16,0,0.01075040027499199
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,1,128,1,float16,float16,0,0.012417600303888322
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,4,128,1,fp8,fp8,0,0.010558400303125381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,1,128,1,float16,float16,0,0.010619200021028518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,1,128,1,float16,fp8,0,0.010654400289058685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,1,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,2,128,1,float16,float16,0,0.0106175996363163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,2,128,1,float16,fp8,0,0.010667199641466141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,1,128,1,float16,fp8,0,0.012646399438381195
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,2,128,1,float16,fp8,0,0.012476799637079239
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,4,128,1,float16,float16,0,0.010651200264692306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,4,128,1,float16,fp8,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,4,128,1,fp8,fp8,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,1,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,1,128,1,float16,fp8,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,1,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,2,128,1,float16,float16,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,2,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,2,128,1,fp8,fp8,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,4,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,4,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,4,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,1,128,1,float16,float16,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,1,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,1,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,2,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,2,128,1,float16,fp8,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,2,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,4,1,128,1,float16,float16,0,0.031231999397277832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,4,1,128,1,float16,fp8,0,0.03708159923553467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,4,1,128,1,fp8,fp8,0,0.03707199990749359
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,4,2,128,1,float16,float16,0,0.030943998694419862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,4,2,128,1,float16,fp8,0,0.03714880049228668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,4,2,128,1,fp8,fp8,0,0.03707039952278137
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,4,128,1,float16,fp8,0,0.010630399733781815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,4,128,1,float16,fp8,0,0.022801600396633148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,4,128,1,fp8,fp8,0,0.023068800568580627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,1,128,1,float16,float16,0,0.02072640061378479
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,1,128,1,float16,fp8,0,0.023012800514698027
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,2,128,1,float16,float16,0,0.02098879963159561
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,2,128,1,float16,fp8,0,0.023108799755573273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,2,128,1,fp8,fp8,0,0.022912000119686127
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,4,128,1,float16,float16,0,0.014767999947071075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,4,128,1,float16,fp8,0,0.016633599996566772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,4,128,1,fp8,fp8,0,0.016521599888801575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,1,128,1,float16,float16,0,0.014620800316333771
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,1,128,1,float16,fp8,0,0.016607999801635742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,1,128,1,fp8,fp8,0,0.016678400337696075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,2,128,1,float16,float16,0,0.014723199605941772
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,2,128,1,float16,fp8,0,0.01655679941177368
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,2,128,1,fp8,fp8,0,0.016571199893951415
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,4,128,1,float16,float16,0,0.011300799995660782
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,4,128,1,float16,float16,0,0.02080480009317398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,4,128,1,float16,fp8,0,0.012435200065374375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,4,128,1,fp8,fp8,0,0.012556800246238708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,1,128,1,fp8,fp8,0,0.022830399870872497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,1,128,1,float16,float16,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,1,128,1,float16,fp8,0,0.012435200065374375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,1,128,1,fp8,fp8,0,0.012531200051307678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,2,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,2,128,1,float16,fp8,0,0.012611199915409089
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,2,128,1,fp8,fp8,0,0.012544000148773193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,4,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,4,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,2,128,1,fp8,fp8,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,4,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,1,128,1,float16,fp8,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,1,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,2,128,1,float16,float16,0,0.010542400181293488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,2,128,1,float16,fp8,0,0.010623999685049058
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,2,128,1,fp8,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,4,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,4,128,1,float16,fp8,0,0.010569600015878677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,4,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,1,128,1,float16,float16,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,1,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,1,128,1,fp8,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,2,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,2,128,1,float16,fp8,0,0.010302399843931198
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,2,128,1,fp8,fp8,0,0.010604800283908844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,4,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,4,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,4,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,1,128,1,float16,float16,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,1,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,1,128,1,fp8,fp8,0,0.010502400249242783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,2,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,2,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,2,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,4,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,4,128,1,float16,fp8,0,0.01029760017991066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,4,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,1,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,1,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,1,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,2,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,2,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,2,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,4,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,4,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,4,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,1,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,1,128,1,float16,fp8,0,0.01029599979519844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,2,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,2,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,2,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,1,128,1,float16,float16,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,4,1,128,1,float16,float16,0,0.03089759945869446
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,4,1,128,1,fp8,fp8,0,0.030961599946022034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,4,1,128,1,float16,fp8,0,0.030904000997543334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,4,2,128,1,float16,float16,0,0.030847999453544616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,4,2,128,1,float16,fp8,0,0.031007999181747438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,4,2,128,1,fp8,fp8,0,0.031079998612403868
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,4,128,1,float16,float16,0,0.020708799362182617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,4,128,1,fp8,fp8,0,0.02070239931344986
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,1,128,1,float16,float16,0,0.019308799505233766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,4,128,1,float16,fp8,0,0.020848000049591066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,1,128,1,float16,fp8,0,0.020764799416065217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,1,128,1,fp8,fp8,0,0.020659199357032774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,2,128,1,float16,float16,0,0.020662400126457214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,2,128,1,float16,fp8,0,0.020852799713611602
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,2,128,1,fp8,fp8,0,0.020707200467586517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,4,128,1,float16,float16,0,0.014484800398349762
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,4,128,1,float16,fp8,0,0.014531199634075165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,4,128,1,fp8,fp8,0,0.014585599303245544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,1,128,1,float16,float16,0,0.014500799775123595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,1,128,1,float16,fp8,0,0.014641599357128143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,1,128,1,fp8,fp8,0,0.014494399726390838
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,2,128,1,float16,float16,0,0.014542399346828461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,2,128,1,float16,fp8,0,0.01459999978542328
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,2,128,1,fp8,fp8,0,0.014548799395561219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,4,128,1,float16,float16,0,0.010718400031328202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,4,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,1,128,1,float16,float16,0,0.011481600254774094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,1,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,1,128,1,fp8,fp8,0,0.011374399811029435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,2,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,2,128,1,float16,fp8,0,0.011870399862527848
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,2,128,1,fp8,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,4,128,1,float16,float16,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,4,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,4,128,1,fp8,fp8,0,0.010564800351858139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,1,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,1,128,1,float16,fp8,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,2,128,1,float16,float16,0,0.010572800040245056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,1,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,2,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,4,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,4,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,4,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,1,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,1,128,1,float16,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,4,128,1,float16,fp8,0,0.012115199863910676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,1,128,1,fp8,fp8,0,0.010518400371074677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,2,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,2,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,2,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,4,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,1,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,4,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,4,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,2,128,1,float16,fp8,0,0.010619200021028518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,1,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,1,128,1,fp8,fp8,0,0.010623999685049058
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,2,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,2,128,1,float16,fp8,0,0.010567999631166457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,2,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,4,128,1,float16,float16,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,4,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,4,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,1,128,1,float16,fp8,0,0.010505600273609162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,1,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,2,128,1,float16,float16,0,0.010518400371074677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,2,128,1,float16,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,2,128,1,fp8,fp8,0,0.010585600137710571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,4,128,1,float16,float16,0,0.01061599999666214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,4,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,4,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,1,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,1,128,1,float16,fp8,0,0.010593599826097488
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,1,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,2,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,2,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,2,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,0,0.02911359965801239
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,0,0.026953598856925963
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,4,1,128,1,fp8,fp8,0,0.027025601267814635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,0,0.027091199159622194
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,1,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,4,2,128,1,fp8,fp8,0,0.027251198887825012
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,0,0.01870400011539459
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,0,0.018796800076961516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,4,128,1,fp8,fp8,0,0.018649600446224213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,0,0.01889760047197342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,0,0.018648000061511995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,1,128,1,fp8,fp8,0,0.01875839978456497
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,0,0.018875199556350707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,2,128,1,fp8,fp8,0,0.01868479996919632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,0,0.01462559998035431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,0,0.013064000010490417
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,4,128,1,fp8,fp8,0,0.01313440054655075
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,0,0.014505599439144135
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,0,0.014593599736690522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,1,128,1,fp8,fp8,0,0.014567999541759491
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,1,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,0,0.014697599411010741
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,0,0.014448000490665436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,2,128,1,fp8,fp8,0,0.014481599628925323
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,0,0.010659199953079224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,0,0.02892799973487854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,4,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,0,0.010558400303125381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,0,0.010630399733781815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,1,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,0,0.012556800246238708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,2,128,1,fp8,fp8,0,0.01053600013256073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,4,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,1,128,1,fp8,fp8,0,0.01053600013256073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,0,0.010976000130176545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,0,0.010649599879980088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,2,128,1,fp8,fp8,0,0.010569600015878677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,0,0.011343999952077865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,4,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,0,0.010601600259542465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,1,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,0,0.010763200372457505
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,2,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,4,128,1,fp8,fp8,0,0.010667199641466141
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,1,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,0,0.010659199953079224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,2,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,0,0.01053759977221489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,4,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,0,0.010545600205659866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,1,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,2,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,0,0.018697600066661834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,4,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,1,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,2,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,0,0.012326399981975555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,2,1,128,1,float16,float16,0,0.8847791671752929
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,2,1,128,1,float16,fp8,0,0.8951472282409668
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16384,2,1,128,1,fp8,fp8,0,0.885643196105957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,2,2,128,1,float16,float16,0,0.5283936023712158
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,2,2,128,1,float16,fp8,0,0.5078864097595215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,2,2,128,1,fp8,fp8,0,0.514415979385376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,2,1,128,1,float16,float16,0,0.5142079830169678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,2,1,128,1,float16,fp8,0,0.511033582687378
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,2,2,128,1,float16,float16,0,0.3237152099609375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,2,1,128,1,fp8,fp8,0,0.5132351875305176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,2,2,128,1,float16,fp8,0,0.3156303882598877
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,2,2,128,1,fp8,fp8,0,0.31605119705200196
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,2,1,128,1,float16,fp8,0,0.32221279144287107
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,2,1,128,1,fp8,fp8,0,0.31698880195617674
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,2,2,128,1,float16,fp8,0,0.205131196975708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,2,2,128,1,fp8,fp8,0,0.2051408052444458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,2,1,128,1,float16,float16,0,0.20823519229888915
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,2,1,128,1,float16,fp8,0,0.2049855947494507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,2,1,128,1,fp8,fp8,0,0.2051919937133789
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,2,1,128,1,float16,float16,0,0.5505951881408692
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,2,1,128,1,float16,fp8,0,0.570959997177124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,2,2,128,1,float16,float16,0,0.33259360790252684
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,12288,2,1,128,1,fp8,fp8,0,0.5644703865051269
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,2,2,128,1,float16,fp8,0,0.33362560272216796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,2,2,128,1,fp8,fp8,0,0.3290303945541382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,2,1,128,1,float16,fp8,0,0.3298991918563843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,2,1,128,1,fp8,fp8,0,0.32929120063781736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,2,1,128,1,float16,float16,0,0.3273263931274414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,2,2,128,1,float16,float16,0,0.20661599636077882
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,2,2,128,1,float16,float16,0,0.2075455904006958
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,2,2,128,1,float16,fp8,0,0.20688800811767577
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,2,2,128,1,fp8,fp8,0,0.20609440803527831
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,2,1,128,1,float16,float16,0,0.207806396484375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,2,1,128,1,float16,fp8,0,0.20820798873901367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,2,1,128,1,fp8,fp8,0,0.20609760284423828
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,2,2,128,1,float16,float16,0,0.1536960005760193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,2,2,128,1,float16,fp8,0,0.15407199859619142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,2,2,128,1,fp8,fp8,0,0.15419520139694215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,2,1,128,1,float16,float16,0,0.1525887966156006
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,2,1,128,1,float16,fp8,0,0.15503840446472167
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,2,1,128,1,float16,float16,0,0.41084961891174315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,2,1,128,1,float16,fp8,0,0.4332752227783203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,10240,2,1,128,1,fp8,fp8,0,0.4276735782623291
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,2,2,128,1,float16,float16,0,0.2556063890457153
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,2,2,128,1,float16,fp8,0,0.2575232028961182
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,2,2,128,1,fp8,fp8,0,0.25866720676422117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,2,1,128,1,float16,float16,0,0.32922399044036865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,2,1,128,1,float16,float16,0,0.24838879108428955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,2,1,128,1,float16,fp8,0,0.25912160873413087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,2,2,128,1,float16,float16,0,0.14858560562133788
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,2,1,128,1,fp8,fp8,0,0.26018080711364744
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,2,2,128,1,float16,fp8,0,0.15454720258712767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,2,2,128,1,fp8,fp8,0,0.15199040174484252
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,2,1,128,1,float16,float16,0,0.15194079875946045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,2,1,128,1,float16,fp8,0,0.15458240509033203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,2,1,128,1,fp8,fp8,0,0.15321760177612304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,2,2,128,1,fp8,fp8,0,0.12929760217666625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,2,2,128,1,float16,fp8,0,0.13114559650421143
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,2,1,128,1,float16,float16,0,0.12758400440216064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,2,1,128,1,float16,fp8,0,0.13130559921264648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,2,1,128,1,fp8,fp8,0,0.12932480573654176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,2,1,128,1,fp8,fp8,0,0.15481760501861572
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,2,1,128,1,float16,float16,0,0.5084127902984619
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,2,1,128,1,float16,fp8,0,0.548201608657837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,2,2,128,1,float16,float16,0,0.29645280838012694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,8192,2,1,128,1,fp8,fp8,0,0.5458000183105469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,2,2,128,1,float16,fp8,0,0.30983519554138184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,2,2,128,1,fp8,fp8,0,0.31191039085388184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,2,1,128,1,float16,float16,0,0.2882591962814331
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,2,1,128,1,float16,fp8,0,0.30939359664916993
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,2,2,128,1,float16,float16,0,0.18093600273132324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,2,1,128,1,fp8,fp8,0,0.31221280097961424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,2,2,128,1,float16,fp8,0,0.19080480337142944
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,2,2,128,1,fp8,fp8,0,0.18944159746170045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,2,1,128,1,float16,float16,0,0.18259520530700685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,2,1,128,1,float16,fp8,0,0.19055039882659913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,2,1,128,1,fp8,fp8,0,0.1899359941482544
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,2,2,128,1,float16,fp8,0,0.11693760156631469
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,2,2,128,1,fp8,fp8,0,0.11772799491882324
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,2,1,128,1,float16,float16,0,0.11270079612731934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,2,2,128,1,float16,float16,0,0.13172800540924073
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,2,1,128,1,fp8,fp8,0,0.11778880357742309
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,2,2,128,1,float16,float16,0,0.1067471981048584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,2,2,128,1,float16,fp8,0,0.10567359924316407
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,2,2,128,1,fp8,fp8,0,0.1067952036857605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,2,1,128,1,float16,float16,0,0.10467679500579834
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,2,1,128,1,float16,fp8,0,0.1068943977355957
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,2,1,128,1,fp8,fp8,0,0.10472160577774048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,2,1,128,1,float16,float16,0,0.32004799842834475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,2,1,128,1,float16,fp8,0,0.36413600444793703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,6144,2,1,128,1,fp8,fp8,0,0.36262080669403074
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,2,2,128,1,float16,float16,0,0.18957120180130005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,2,2,128,1,float16,fp8,0,0.20681118965148926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,2,2,128,1,float16,float16,0,0.1130031943321228
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,2,2,128,1,fp8,fp8,0,0.20710079669952391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,2,1,128,1,float16,fp8,0,0.11734399795532227
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,2,1,128,1,float16,float16,0,0.18760960102081298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,2,1,128,1,fp8,fp8,0,0.2043855905532837
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,2,2,128,1,float16,float16,0,0.11789920330047607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,2,2,128,1,float16,fp8,0,0.12526079416275024
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,2,2,128,1,fp8,fp8,0,0.12652000188827514
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,2,1,128,1,float16,float16,0,0.115065598487854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,2,1,128,1,float16,fp8,0,0.12643359899520873
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,2,2,128,1,float16,float16,0,0.08595679998397827
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,2,2,128,1,float16,fp8,0,0.09048159718513489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,2,2,128,1,fp8,fp8,0,0.09061440229415893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,2,1,128,1,float16,float16,0,0.08644319772720337
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,2,1,128,1,float16,fp8,0,0.09077439904212951
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,2,1,128,1,fp8,fp8,0,0.09054880142211914
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,2,2,128,1,float16,float16,0,0.08231520056724548
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,2,2,128,1,float16,fp8,0,0.08233600258827209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,2,2,128,1,fp8,fp8,0,0.0826479971408844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,2,1,128,1,float16,fp8,0,0.20905120372772218
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,2,1,128,1,float16,fp8,0,0.08230239748954774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,2,1,128,1,float16,float16,0,0.2957103967666626
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,2,1,128,1,fp8,fp8,0,0.1252560019493103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,2,1,128,1,fp8,fp8,0,0.3701312065124512
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,2,2,128,1,float16,float16,0,0.17006880044937134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,2,2,128,1,float16,fp8,0,0.20509920120239258
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,2,2,128,1,fp8,fp8,0,0.2051487922668457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,2,1,128,1,float16,float16,0,0.16796159744262695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,2,1,128,1,float16,fp8,0,0.20429279804229736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,2,1,128,1,float16,float16,0,0.08238239884376526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,2,1,128,1,fp8,fp8,0,0.20446081161499025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,2,2,128,1,float16,float16,0,0.1076464056968689
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,2,2,128,1,float16,fp8,0,0.12108960151672363
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,2,2,128,1,fp8,fp8,0,0.12251839637756348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,2,1,128,1,float16,float16,0,0.1028656005859375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,2,1,128,1,float16,fp8,0,0.1227903962135315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,2,1,128,1,fp8,fp8,0,0.12101600170135499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,4096,2,1,128,1,float16,fp8,0,0.37095839977264405
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,2,2,128,1,float16,fp8,0,0.07390559911727905
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,2,2,128,1,fp8,fp8,0,0.07380160093307495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,2,1,128,1,float16,float16,0,0.06385279893875122
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,2,1,128,1,float16,fp8,0,0.07353760004043579
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,2,1,128,1,fp8,fp8,0,0.07400959730148315
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,2,2,128,1,float16,float16,0,0.059571200609207155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,2,2,128,1,float16,fp8,0,0.06367679834365844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,2,2,128,1,fp8,fp8,0,0.06332160234451294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,2,1,128,1,float16,float16,0,0.059520000219345094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,2,1,128,1,float16,fp8,0,0.06308799982070923
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,2,1,128,1,fp8,fp8,0,0.06362879872322083
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,2,2,128,1,float16,float16,0,0.05764639973640442
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,2,2,128,1,float16,fp8,0,0.05955359935760498
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,2,2,128,1,fp8,fp8,0,0.05767999887466431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,2,1,128,1,float16,float16,0,0.05768160223960876
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,2,1,128,1,float16,fp8,0,0.05766559839248657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,2,1,128,1,fp8,fp8,0,0.05957760214805603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,2,1,128,1,float16,float16,0,0.1931615948677063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,2,1,128,1,float16,fp8,0,0.25167040824890136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,2,2,128,1,float16,float16,0,0.11167999505996704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,2,1,128,1,fp8,fp8,0,0.08423839807510376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,2,2,128,1,float16,fp8,0,0.13964799642562867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,2,2,128,1,fp8,fp8,0,0.14092479944229125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,2,1,128,1,float16,float16,0,0.10960479974746704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,2,1,128,1,fp8,fp8,0,0.1392367959022522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,2,1,128,1,float16,fp8,0,0.14021919965744017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,2,2,128,1,float16,float16,0,0.06796960234642029
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,2,2,128,1,float16,float16,0,0.06489279866218567
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,2,2,128,1,fp8,fp8,0,0.0821344017982483
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,2,1,128,1,float16,float16,0,0.07009599804878235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,2,1,128,1,float16,fp8,0,0.08223999738693237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,2,1,128,1,fp8,fp8,0,0.0846127986907959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,2,2,128,1,float16,float16,0,0.04946399927139282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,2,2,128,1,float16,fp8,0,0.05781919956207275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,2,1,128,1,float16,float16,0,0.05008640289306641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,2,1,128,1,float16,fp8,0,0.05772320032119751
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,2,1,128,1,fp8,fp8,0,0.05766080021858215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,2,2,128,1,float16,float16,0,0.04737600088119507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,2,2,128,1,float16,fp8,0,0.05032640099525452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,2,2,128,1,fp8,fp8,0,0.0506384015083313
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,2,1,128,1,float16,float16,0,0.04751040041446686
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,3072,2,1,128,1,fp8,fp8,0,0.25436160564422605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,2,1,128,1,float16,fp8,0,0.049584001302719116
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,2,1,128,1,fp8,fp8,0,0.05133919715881348
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,2,2,128,1,float16,float16,0,0.04623680114746094
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,2,2,128,1,float16,fp8,0,0.047295999526977536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,2,2,128,1,fp8,fp8,0,0.047444799542427064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,2,1,128,1,float16,fp8,0,0.04730400145053863
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,2,1,128,1,fp8,fp8,0,0.0473904013633728
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,2,2,128,1,float16,fp8,0,0.08383839726448059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,2,1,128,1,float16,float16,0,0.19364160299301147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,2,1,128,1,float16,fp8,0,0.2803231954574585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,2,2,128,1,float16,float16,0,0.11007360219955445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,2048,2,1,128,1,fp8,fp8,0,0.2825648069381714
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,2,2,128,1,fp8,fp8,0,0.05753920078277588
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,2,2,128,1,float16,fp8,0,0.15129439830780028
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,2,2,128,1,fp8,fp8,0,0.1539039969444275
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,2,1,128,1,float16,float16,0,0.10617920160293579
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,2,1,128,1,float16,fp8,0,0.14945919513702394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,2,1,128,1,fp8,fp8,0,0.1487504005432129
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,2,2,128,1,float16,fp8,0,0.0862335979938507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,2,2,128,1,fp8,fp8,0,0.08624320030212403
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,2,1,128,1,float16,float16,0,0.06422560214996338
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,2,1,128,1,float16,fp8,0,0.08640319705009461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,2,1,128,1,fp8,fp8,0,0.0863759994506836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,2,2,128,1,float16,float16,0,0.0393999993801117
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,2,2,128,1,float16,fp8,0,0.05144960284233093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,2,2,128,1,fp8,fp8,0,0.05147839784622192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,2,1,128,1,float16,float16,0,0.03925600051879883
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,2,1,128,1,float16,fp8,0,0.05135040283203125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,2,1,128,1,fp8,fp8,0,0.051363199949264526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,2,2,128,1,float16,float16,0,0.03712320029735565
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,2,2,128,1,float16,fp8,0,0.041140800714492796
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,2,2,128,1,fp8,fp8,0,0.04124319851398468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,2,1,128,1,float16,float16,0,0.03704479932785034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,2,1,128,1,float16,fp8,0,0.04119200110435486
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,2,2,128,1,float16,float16,0,0.06583840250968934
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,2,2,128,1,float16,float16,0,0.035132798552513125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,2,2,128,1,float16,fp8,0,0.037190398573875426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,2,2,128,1,fp8,fp8,0,0.03717280030250549
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,2,1,128,1,float16,float16,0,0.03510079979896545
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,2,1,128,1,float16,fp8,0,0.03718239963054657
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,2,1,128,1,fp8,fp8,0,0.03717919886112213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,2,2,128,1,float16,float16,0,0.03498719930648804
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,2,2,128,1,float16,fp8,0,0.0350959986448288
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,2,2,128,1,fp8,fp8,0,0.035087999701499936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,2,1,128,1,float16,float16,0,0.034999999403953555
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,2,1,128,1,float16,fp8,0,0.03504000008106232
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,2,1,128,1,fp8,fp8,0,0.03503519892692566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,2,1,128,1,float16,float16,0,0.12900320291519166
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,2,1,128,1,float16,fp8,0,0.19673919677734375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1536,2,1,128,1,fp8,fp8,0,0.1973871946334839
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,2,2,128,1,float16,float16,0,0.0765168011188507
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,2,2,128,1,float16,fp8,0,0.1070207953453064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,2,2,128,1,fp8,fp8,0,0.10829919576644897
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,2,1,128,1,fp8,fp8,0,0.041294398903846743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,2,1,128,1,float16,float16,0,0.07395520210266113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,2,1,128,1,float16,fp8,0,0.10726560354232788
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,2,1,128,1,fp8,fp8,0,0.10676000118255616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,2,2,128,1,float16,fp8,0,0.06285920143127441
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,2,1,128,1,float16,float16,0,0.04647360146045685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,2,1,128,1,float16,fp8,0,0.06303359866142273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,2,1,128,1,fp8,fp8,0,0.06301760077476501
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,2,2,128,1,float16,float16,0,0.03289600014686585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,2,2,128,1,float16,fp8,0,0.041126400232315063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,2,1,128,1,float16,float16,0,0.04731839895248413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,2,2,128,1,fp8,fp8,0,0.04115839898586273
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,2,1,128,1,float16,fp8,0,0.04114719927310943
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,2,1,128,1,float16,float16,0,0.032979199290275575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,2,2,128,1,float16,float16,0,0.028968000411987306
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,2,1,128,1,fp8,fp8,0,0.04116159975528717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,2,2,128,1,float16,fp8,0,0.03298879861831665
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,2,2,128,1,fp8,fp8,0,0.03491199910640717
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,2,1,128,1,float16,float16,0,0.028880000114440918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,2,1,128,1,float16,fp8,0,0.03374080061912536
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,2,1,128,1,fp8,fp8,0,0.033055999875068666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,2,2,128,1,float16,float16,0,0.028911998867988585
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,2,2,128,1,float16,fp8,0,0.03091999888420105
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,2,2,128,1,fp8,fp8,0,0.030856001377105712
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,2,1,128,1,float16,float16,0,0.02908160090446472
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,2,1,128,1,float16,fp8,0,0.030985599756240843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,2,1,128,1,fp8,fp8,0,0.03089439868927002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,2,2,128,1,float16,float16,0,0.0289247989654541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,2,2,128,1,fp8,fp8,0,0.029016000032424927
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,2,1,128,1,float16,float16,0,0.028921601176261903
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,2,1,128,1,float16,fp8,0,0.02908959984779358
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,2,2,128,1,float16,float16,0,0.04640159904956818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,2,1,128,1,fp8,fp8,0,0.029020801186561584
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,2,2,128,1,fp8,fp8,0,0.06220960021018982
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,2,1,128,1,float16,float16,0,0.13772640228271485
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,2,1,128,1,float16,fp8,0,0.23260960578918458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1024,2,1,128,1,fp8,fp8,0,0.2299344062805176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,2,2,128,1,float16,fp8,0,0.12465120553970337
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,2,2,128,1,fp8,fp8,0,0.12301599979400635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,2,1,128,1,float16,float16,0,0.07698720097541809
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,2,1,128,1,float16,fp8,0,0.122489595413208
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,2,1,128,1,fp8,fp8,0,0.12315200567245484
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,2,2,128,1,float16,fp8,0,0.06884639859199523
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,2,2,128,1,fp8,fp8,0,0.06828479766845703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,2,1,128,1,float16,float16,0,0.045552000403404236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,2,1,128,1,float16,fp8,0,0.06815999746322632
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,2,1,128,1,fp8,fp8,0,0.06872640252113342
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,2,2,128,1,float16,float16,0,0.028748801350593566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,2,2,128,1,float16,fp8,0,0.03924959897994995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,2,2,128,1,float16,fp8,0,0.028944000601768494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,2,2,128,1,fp8,fp8,0,0.03925119936466217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,2,1,128,1,float16,float16,0,0.027100801467895508
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,2,1,128,1,float16,fp8,0,0.04120480120182037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,2,1,128,1,fp8,fp8,0,0.03912320137023926
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,2,2,128,1,float16,fp8,0,0.030935999751091004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,2,2,128,1,fp8,fp8,0,0.030895999073982237
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,2,1,128,1,float16,float16,0,0.02476799935102463
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,2,1,128,1,float16,fp8,0,0.030976000428199767
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,2,2,128,1,float16,float16,0,0.0779263973236084
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,2,1,128,1,fp8,fp8,0,0.03089439868927002
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,2,2,128,1,float16,float16,0,0.023104000091552734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,2,2,128,1,float16,fp8,0,0.026238399744033813
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,2,2,128,1,fp8,fp8,0,0.026872000098228453
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,2,1,128,1,float16,float16,0,0.022777600586414336
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,2,1,128,1,float16,fp8,0,0.026814401149749756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,2,2,128,1,float16,float16,0,0.04642719924449921
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,2,1,128,1,fp8,fp8,0,0.026707199215888978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,2,2,128,1,float16,float16,0,0.022731199860572815
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,2,2,128,1,float16,fp8,0,0.02359199970960617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,2,2,128,1,fp8,fp8,0,0.022881600260734557
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,2,1,128,1,float16,float16,0,0.02292799949645996
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,2,1,128,1,float16,fp8,0,0.02279520034790039
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,2,1,128,1,fp8,fp8,0,0.024777600169181825
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,2,2,128,1,float16,float16,0,0.022707200050354003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,2,2,128,1,fp8,fp8,0,0.022815999388694764
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,2,1,128,1,float16,float16,0,0.022771200537681578
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,2,1,128,1,float16,fp8,0,0.02280000001192093
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,2,1,128,1,fp8,fp8,0,0.02284640073776245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,2,1,128,1,float16,float16,0,0.10873919725418091
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,2,2,128,1,float16,float16,0,0.024831999838352204
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,2,1,128,1,float16,fp8,0,0.20568320751190186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,512,2,1,128,1,fp8,fp8,0,0.20820798873901367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,2,2,128,1,float16,fp8,0,0.11084799766540528
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,2,2,128,1,fp8,fp8,0,0.11061919927597046
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,2,1,128,1,float16,float16,0,0.060791999101638794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,2,1,128,1,float16,fp8,0,0.10885599851608277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,2,1,128,1,fp8,fp8,0,0.1086959958076477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,2,2,128,1,float16,float16,0,0.03724479973316193
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,2,2,128,1,float16,fp8,0,0.022864000499248506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,2,2,128,1,float16,fp8,0,0.05975040197372437
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,2,1,128,1,float16,float16,0,0.03533119857311249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,2,1,128,1,float16,fp8,0,0.05970720052719116
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,2,1,128,1,fp8,fp8,0,0.05973920226097107
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,2,2,128,1,float16,float16,0,0.022793599963188173
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,2,2,128,1,float16,fp8,0,0.03514559864997864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,2,1,128,1,float16,float16,0,0.022804799675941467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,2,1,128,1,float16,fp8,0,0.03497599959373474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,2,1,128,1,fp8,fp8,0,0.03512639999389648
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,2,2,128,1,float16,float16,0,0.06204800009727478
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,2,2,128,1,float16,fp8,0,0.02486560046672821
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,2,2,128,1,fp8,fp8,0,0.024833600223064422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,2,1,128,1,float16,float16,0,0.01860480010509491
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,2,1,128,1,float16,fp8,0,0.024780799448490144
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,2,1,128,1,fp8,fp8,0,0.024792000651359558
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,2,2,128,1,float16,float16,0,0.016654400527477263
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,2,2,128,1,float16,fp8,0,0.020793600380420683
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,2,2,128,1,fp8,fp8,0,0.02069920003414154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,2,1,128,1,float16,float16,0,0.016704000532627106
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,2,1,128,1,float16,fp8,0,0.020662400126457214
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,2,1,128,1,fp8,fp8,0,0.02066880017518997
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,2,2,128,1,fp8,fp8,0,0.0599295973777771
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,2,2,128,1,float16,fp8,0,0.01857440024614334
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,2,2,128,1,fp8,fp8,0,0.018632000684738158
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,2,1,128,1,float16,float16,0,0.016628800332546233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,2,1,128,1,float16,fp8,0,0.0186256006360054
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,2,1,128,1,fp8,fp8,0,0.018513600528240203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,2,2,128,1,fp8,fp8,0,0.03510879874229431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,2,2,128,1,float16,float16,0,0.016527999937534333
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,2,2,128,1,float16,fp8,0,0.01669600009918213
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,2,2,128,1,fp8,fp8,0,0.016777600347995757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,2,1,128,1,float16,float16,0,0.016572800278663636
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,2,1,128,1,float16,fp8,0,0.016755199432373045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,2,1,128,1,fp8,fp8,0,0.016683200001716615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,2,2,128,1,float16,float16,0,0.016523200273513793
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,2,2,128,1,float16,float16,0,0.018780800700187682
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,2,2,128,1,float16,fp8,0,0.016539199650287627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,2,2,128,1,fp8,fp8,0,0.01668799966573715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,2,1,128,1,float16,float16,0,0.016764800250530242
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,2,1,128,1,float16,fp8,0,0.01669439971446991
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,2,1,128,1,float16,fp8,0,0.10216959714889526
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,2,1,128,1,float16,float16,0,0.053590399026870725
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,2,2,128,1,float16,float16,0,0.031067198514938353
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,256,2,1,128,1,fp8,fp8,0,0.10166560411453247
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,2,2,128,1,float16,fp8,0,0.05548959970474243
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,2,2,128,1,fp8,fp8,0,0.05540639758110046
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,2,1,128,1,float16,float16,0,0.02921440005302429
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,2,1,128,1,float16,fp8,0,0.05522879958152771
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,2,1,128,1,fp8,fp8,0,0.05554400086402893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,2,2,128,1,float16,float16,0,0.018721599876880646
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,2,2,128,1,float16,fp8,0,0.03301759958267212
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,2,1,128,1,float16,float16,0,0.018729600310325622
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,2,2,128,1,float16,float16,0,0.016612799465656282
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,2,1,128,1,float16,fp8,0,0.03287520110607147
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,2,1,128,1,fp8,fp8,0,0.030955201387405394
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,2,2,128,1,float16,float16,0,0.01666239947080612
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,2,2,128,1,float16,fp8,0,0.02272319942712784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,2,2,128,1,fp8,fp8,0,0.022780799865722658
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,2,1,128,1,float16,float16,0,0.01656160056591034
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,2,1,128,1,float16,fp8,0,0.022899200022220612
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,2,1,128,1,fp8,fp8,0,0.02276960015296936
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,2,2,128,1,float16,float16,0,0.014727999269962311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,2,2,128,1,float16,fp8,0,0.018622399866580965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,2,2,128,1,fp8,fp8,0,0.018648000061511995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,2,1,128,1,float16,float16,0,0.014548799395561219
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,2,1,128,1,float16,fp8,0,0.018667200207710268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,2,1,128,1,fp8,fp8,0,0.018654400110244752
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,2,2,128,1,float16,float16,0,0.014476799964904785
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,2,2,128,1,float16,fp8,0,0.014873600006103516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,2,2,128,1,fp8,fp8,0,0.01579679995775223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,2,1,128,1,float16,float16,0,0.014521600306034088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,2,1,128,1,fp8,fp8,0,0.014567999541759491
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,2,1,128,1,fp8,fp8,0,0.01655520051717758
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,2,2,128,1,float16,float16,0,0.014579200744628906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,2,2,128,1,float16,fp8,0,0.014561599493026734
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,2,2,128,1,fp8,fp8,0,0.014550399780273438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,2,1,128,1,float16,float16,0,0.012600000202655792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,2,1,128,1,float16,fp8,0,0.014558400213718414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,2,1,128,1,fp8,fp8,0,0.01454399973154068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,2,2,128,1,float16,float16,0,0.012579199671745301
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,2,2,128,1,fp8,fp8,0,0.030980798602104186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,2,2,128,1,float16,fp8,0,0.014542399346828461
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,2,2,128,1,fp8,fp8,0,0.014584000408649444
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,2,1,128,1,float16,float16,0,0.012604799866676331
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,2,1,128,1,float16,fp8,0,0.013844799995422364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,2,1,128,1,fp8,fp8,0,0.013366399705410004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,2,2,128,1,float16,float16,0,0.012783999741077422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,2,2,128,1,float16,fp8,0,0.012582400441169738
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,2,2,128,1,fp8,fp8,0,0.012591999769210816
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,2,1,128,1,float16,float16,0,0.013254399597644805
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,2,1,128,1,float16,fp8,0,0.014371199905872345
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,2,1,128,1,fp8,fp8,0,0.012681600451469422
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,2,1,128,1,float16,float16,0,0.033022400736808774
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,2,1,128,1,float16,fp8,0,0.0576416015625
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,128,2,1,128,1,fp8,fp8,0,0.05763840079307556
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,2,2,128,1,float16,float16,0,0.020839999616146087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,2,2,128,1,float16,fp8,0,0.03299039900302887
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,2,2,128,1,fp8,fp8,0,0.033073601126670835
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,2,1,128,1,float16,float16,0,0.018747200071811677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,2,1,128,1,float16,fp8,0,0.014838400483131408
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,2,1,128,1,float16,fp8,0,0.03311040103435516
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,2,1,128,1,fp8,fp8,0,0.03312320113182068
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,2,2,128,1,float16,float16,0,0.014508800208568573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,2,2,128,1,float16,fp8,0,0.02096319943666458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,2,1,128,1,float16,float16,0,0.014569599926471711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,2,2,128,1,fp8,fp8,0,0.02080000042915344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,2,1,128,1,float16,fp8,0,0.020739200711250304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,2,1,128,1,fp8,fp8,0,0.020798400044441223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,2,2,128,1,float16,float16,0,0.012542399764060973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,2,2,128,1,fp8,fp8,0,0.016550399363040924
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,2,1,128,1,float16,float16,0,0.012548799812793731
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,2,1,128,1,float16,fp8,0,0.016521599888801575
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,2,1,128,1,fp8,fp8,0,0.015489600598812103
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,2,2,128,1,float16,float16,0,0.012470400333404541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,2,2,128,1,float16,fp8,0,0.012556800246238708
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,2,2,128,1,fp8,fp8,0,0.012665599584579468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,2,1,128,1,float16,float16,0,0.011686400324106217
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,2,1,128,1,float16,fp8,0,0.012603199481964112
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,2,1,128,1,fp8,fp8,0,0.012753599882125854
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,2,2,128,1,float16,float16,0,0.010599999874830245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,2,2,128,1,float16,fp8,0,0.012569600343704223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,2,2,128,1,fp8,fp8,0,0.012439999729394913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,2,1,128,1,float16,float16,0,0.010657600313425063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,2,1,128,1,float16,fp8,0,0.012868799269199371
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,2,1,128,1,fp8,fp8,0,0.012428800016641617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,2,2,128,1,float16,float16,0,0.010840000212192535
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,2,2,128,1,float16,fp8,0,0.0108255997300148
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,2,2,128,1,fp8,fp8,0,0.012455999851226807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,2,1,128,1,float16,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,2,1,128,1,fp8,fp8,0,0.012166400253772736
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,2,2,128,1,float16,float16,0,0.010824000090360641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,2,2,128,1,float16,fp8,0,0.010830400139093399
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,2,2,128,1,fp8,fp8,0,0.010819199681282043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,2,1,128,1,float16,float16,0,0.010784000158309937
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,2,1,128,1,float16,fp8,0,0.010590399801731109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,2,1,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,2,2,128,1,float16,float16,0,0.010713600367307664
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,2,2,128,1,float16,fp8,0,0.01056160032749176
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,2,1,128,1,float16,float16,0,0.010558400303125381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,2,1,128,1,float16,fp8,0,0.010745599865913391
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,2,1,128,1,fp8,fp8,0,0.010628800094127654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,2,1,128,1,float16,float16,0,0.024934400618076325
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,2,2,128,1,float16,fp8,0,0.014697599411010741
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,2,1,128,1,float16,fp8,0,0.037145599722862244
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,64,2,1,128,1,fp8,fp8,0,0.037169599533081056
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,2,2,128,1,float16,float16,0,0.016734400391578676
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,2,2,128,1,float16,fp8,0,0.022812800109386445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,2,2,128,1,fp8,fp8,0,0.02295680046081543
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,2,1,128,1,float16,float16,0,0.016627199947834015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,2,1,128,1,float16,fp8,0,0.02290239930152893
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,2,2,128,1,float16,float16,0,0.012703999876976013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,2,2,128,1,float16,fp8,0,0.014883199334144592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,2,1,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,2,1,128,1,float16,float16,0,0.012564800679683685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,2,1,128,1,float16,fp8,0,0.01465120017528534
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,2,1,128,1,fp8,fp8,0,0.014769600331783294
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,2,2,128,1,float16,float16,0,0.01075040027499199
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,2,2,128,1,float16,fp8,0,0.012540799379348756
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,2,2,128,1,fp8,fp8,0,0.012537600100040435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,2,1,128,1,float16,float16,0,0.011156799644231797
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,2,1,128,1,float16,fp8,0,0.012491200119256973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,2,1,128,1,fp8,fp8,0,0.012680000066757202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,2,2,128,1,fp8,fp8,0,0.010625600069761276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,2,2,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,2,2,128,1,float16,fp8,0,0.012518399953842163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,2,2,128,1,fp8,fp8,0,0.01244800016283989
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,2,1,128,1,float16,float16,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,2,1,128,1,float16,fp8,0,0.011975999921560287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,2,1,128,1,fp8,fp8,0,0.012452799826860428
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,2,2,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,2,2,128,1,float16,fp8,0,0.010726399719715118
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,2,2,128,1,fp8,fp8,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,2,1,128,1,float16,float16,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,2,1,128,1,float16,fp8,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,2,1,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,2,2,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,2,1,128,1,fp8,fp8,0,0.022732800245285033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,2,2,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,2,2,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,2,1,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,2,1,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,2,2,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,2,2,128,1,float16,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,2,2,128,1,fp8,fp8,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,2,1,128,1,float16,float16,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,2,1,128,1,float16,fp8,0,0.010625600069761276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,2,1,128,1,fp8,fp8,0,0.011055999994277954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,2,2,128,1,float16,float16,0,0.010652799904346467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,2,2,128,1,float16,fp8,0,0.010649599879980088
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,2,2,128,1,fp8,fp8,0,0.010611200332641601
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,2,1,128,1,float16,float16,0,0.010636799782514573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,2,1,128,1,float16,fp8,0,0.010583999752998351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,2,1,128,1,fp8,fp8,0,0.010620799660682679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,2,1,128,1,float16,float16,0,0.022614400088787078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,2,2,128,1,fp8,fp8,0,0.014699199795722961
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,2,1,128,1,float16,fp8,0,0.026958400011062623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,32,2,1,128,1,fp8,fp8,0,0.027107200026512145
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,2,2,128,1,float16,float16,0,0.01475680023431778
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,2,2,128,1,float16,fp8,0,0.018617600202560425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,2,2,128,1,fp8,fp8,0,0.018188799917697906
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,2,1,128,1,float16,float16,0,0.014655999839305878
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,2,1,128,1,fp8,fp8,0,0.018566399812698364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,2,2,128,1,float16,fp8,0,0.012665599584579468
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,2,2,128,1,float16,float16,0,0.012961600720882416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,2,2,128,1,fp8,fp8,0,0.012703999876976013
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,2,1,128,1,float16,fp8,0,0.012638400495052337
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,2,1,128,1,float16,float16,0,0.012569600343704223
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,2,1,128,1,fp8,fp8,0,0.012600000202655792
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,2,2,128,1,float16,fp8,0,0.011126399785280228
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,2,2,128,1,float16,float16,0,0.011751999706029892
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,2,2,128,1,fp8,fp8,0,0.011291199922561645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,2,1,128,1,float16,float16,0,0.010940799862146378
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,2,1,128,1,float16,fp8,0,0.010926400125026704
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,2,2,128,1,float16,float16,0,0.010659199953079224
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,2,1,128,1,fp8,fp8,0,0.011398400366306304
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,2,2,128,1,float16,fp8,0,0.010654400289058685
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,2,1,128,1,float16,float16,0,0.01053759977221489
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,2,2,128,1,fp8,fp8,0,0.011006399989128113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,2,1,128,1,float16,fp8,0,0.010628800094127654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,2,1,128,1,fp8,fp8,0,0.01096159964799881
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,2,2,128,1,float16,float16,0,0.010673599690198899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,2,2,128,1,fp8,fp8,0,0.010673599690198899
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,2,1,128,1,float16,float16,0,0.010574399679899215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,2,1,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,2,1,128,1,fp8,fp8,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,2,2,128,1,float16,float16,0,0.010601600259542465
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,2,2,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,2,2,128,1,fp8,fp8,0,0.01063840016722679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,2,1,128,1,float16,float16,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,2,1,128,1,float16,fp8,0,0.010527999699115753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,2,1,128,1,float16,fp8,0,0.016979199647903443
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,2,1,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,2,2,128,1,float16,float16,0,0.010518400371074677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,2,2,128,1,fp8,fp8,0,0.010547199845314026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,2,1,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,2,1,128,1,float16,fp8,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,2,1,128,1,fp8,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,2,2,128,1,float16,float16,0,0.010577599704265594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,2,2,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,2,2,128,1,fp8,fp8,0,0.010580799728631973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,2,1,128,1,float16,float16,0,0.010527999699115753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,2,1,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,2,2,128,1,float16,fp8,0,0.010929600149393082
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,2,1,128,1,fp8,fp8,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,2,1,128,1,float16,float16,0,0.02078080028295517
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,2,1,128,1,fp8,fp8,0,0.022892799973487855
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,16,2,1,128,1,float16,fp8,0,0.022844800353050233
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,2,2,128,1,float16,float16,0,0.014582400023937226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,2,2,128,1,float16,fp8,0,0.014703999459743499
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,2,2,128,1,fp8,fp8,0,0.015216000378131866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,2,1,128,1,float16,float16,0,0.014612799882888794
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,2,1,128,1,fp8,fp8,0,0.014868800342082978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,2,2,128,1,float16,float16,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,2,2,128,1,float16,fp8,0,0.012999999523162841
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,2,2,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,2,2,128,1,fp8,fp8,0,0.0125231996178627
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,2,1,128,1,float16,float16,0,0.011670400202274323
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,2,1,128,1,float16,fp8,0,0.012539200484752655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,2,1,128,1,fp8,fp8,0,0.012464000284671784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,2,2,128,1,float16,float16,0,0.010795199871063232
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,2,2,128,1,fp8,fp8,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,2,1,128,1,float16,float16,0,0.010548800230026245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,2,1,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,2,1,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,2,2,128,1,float16,float16,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,2,2,128,1,float16,fp8,0,0.010547199845314026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,2,2,128,1,fp8,fp8,0,0.010635200142860412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,2,1,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,2,1,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,2,1,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,2,1,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,2,2,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,2,2,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,2,2,128,1,fp8,fp8,0,0.010599999874830245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,2,1,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,2,1,128,1,float16,fp8,0,0.015199999511241912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,2,1,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,2,2,128,1,float16,float16,0,0.010532800108194351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,2,2,128,1,float16,fp8,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,2,2,128,1,fp8,fp8,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,2,1,128,1,float16,float16,0,0.010539200156927109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,2,1,128,1,float16,fp8,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,2,2,128,1,float16,fp8,0,0.010608000308275222
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,2,1,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,2,2,128,1,float16,float16,0,0.010596799850463866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,2,2,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,2,2,128,1,fp8,fp8,0,0.010639999806880952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,2,1,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,2,1,128,1,float16,fp8,0,0.010585600137710571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,2,2,128,1,float16,float16,0,0.01056319996714592
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,2,2,128,1,float16,fp8,0,0.010596799850463866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,2,2,128,1,fp8,fp8,0,0.010369600355625152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,2,1,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,2,1,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,2,1,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,0,0.01968960016965866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,0,0.018848000466823576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,256,1,2,1,128,1,fp8,fp8,0,0.018743999302387238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,0,0.014596800506114959
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,0,0.013335999846458436
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,2,2,128,1,fp8,fp8,0,0.013307200372219085
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,2,1,128,1,fp8,fp8,0,0.010328000038862228
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,0,0.01451520025730133
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,0,0.013876800239086152
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,2,1,128,1,fp8,fp8,0,0.012916800379753113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,0,0.01061279997229576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,2,2,128,1,fp8,fp8,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,0,0.01061440035700798
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,2,1,128,1,fp8,fp8,0,0.010652799904346467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,0,0.010657600313425063
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,0,0.010608000308275222
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,2,2,128,1,fp8,fp8,0,0.010556799918413162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,2,1,128,1,fp8,fp8,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,0,0.010590399801731109
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,0,0.010547199845314026
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,2,1,128,1,fp8,fp8,0,0.010705599933862687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,0,0.010543999820947647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,2,2,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,2,1,128,1,fp8,fp8,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,2,2,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,0,0.010364799946546554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,2,1,128,1,fp8,fp8,0,0.010320000350475311
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,0,0.010361599922180175
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,2,2,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,0,0.010715200006961823
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,2,1,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,0,0.010620799660682679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,2,2,128,1,fp8,fp8,0,0.010576000064611435
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,0,0.010532800108194351
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,2,2,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,0,0.010611200332641601
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,0,0.010518400371074677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,2,1,128,1,fp8,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,1,1,128,1,float16,float16,0,0.3304816007614136
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,1,1,128,1,float16,fp8,0,0.35025439262390134
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,1,1,128,1,float16,float16,0,0.21362719535827637
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16384,1,1,128,1,fp8,fp8,0,0.35148799419403076
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,1,1,128,1,float16,fp8,0,0.22202560901641846
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16384,1,1,128,1,fp8,fp8,0,0.2206576108932495
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,0,0.010606399923563003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,1,1,128,1,float16,float16,0,0.19585599899291992
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,1,1,128,1,float16,fp8,0,0.19914560317993163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16384,1,1,128,1,fp8,fp8,0,0.1989824056625366
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,2,1,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,1,1,128,1,float16,float16,0,0.21448960304260253
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,1,1,128,1,float16,fp8,0,0.2311631917953491
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,12288,1,1,128,1,fp8,fp8,0,0.23298239707946777
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,1,1,128,1,float16,float16,0,0.15941280126571655
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,1,1,128,1,float16,fp8,0,0.1662783980369568
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,12288,1,1,128,1,fp8,fp8,0,0.16819039583206177
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,1,1,128,1,float16,float16,0,0.1516592025756836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,1,1,128,1,float16,fp8,0,0.15392160415649414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,12288,1,1,128,1,fp8,fp8,0,0.15212479829788209
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,1,1,128,1,float16,float16,0,0.1537328004837036
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,1,1,128,1,fp8,fp8,0,0.1764623999595642
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,1,1,128,1,float16,float16,0,0.13147200345993043
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,1,1,128,1,float16,fp8,0,0.1415887951850891
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,1,1,128,1,float16,float16,0,0.12555840015411376
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,1,1,128,1,float16,fp8,0,0.1293887972831726
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,10240,1,1,128,1,fp8,fp8,0,0.12785120010375978
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,1,1,128,1,float16,float16,0,0.18981759548187255
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,1,1,128,1,float16,fp8,0,0.22377119064331055
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,8192,1,1,128,1,fp8,fp8,0,0.22399520874023438
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,1,1,128,1,float16,float16,0,0.11725440025329589
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,1,1,128,1,float16,fp8,0,0.1351855993270874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,8192,1,1,128,1,fp8,fp8,0,0.1353983998298645
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,1,1,128,1,float16,float16,0,0.10896799564361573
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,10240,1,1,128,1,float16,fp8,0,0.17609920501708984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,1,1,128,1,float16,fp8,0,0.1153439998626709
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,8192,1,1,128,1,fp8,fp8,0,0.11496479511260986
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,10240,1,1,128,1,fp8,fp8,0,0.14232800006866456
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,1,1,128,1,float16,float16,0,0.10471520423889161
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,1,1,128,1,float16,fp8,0,0.10670239925384521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,8192,1,1,128,1,fp8,fp8,0,0.10673919916152955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,1,1,128,1,float16,float16,0,0.12148480415344239
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,1,1,128,1,fp8,fp8,0,0.15299199819564818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,1,1,128,1,float16,float16,0,0.0885807991027832
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,1,1,128,1,float16,fp8,0,0.10447360277175903
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,6144,1,1,128,1,fp8,fp8,0,0.10285919904708862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,1,1,128,1,float16,float16,0,0.08413119912147522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,1,1,128,1,float16,fp8,0,0.08834559917449951
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,6144,1,1,128,1,fp8,fp8,0,0.0903663992881775
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,1,1,128,1,float16,float16,0,0.08214719891548157
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,1,1,128,1,fp8,fp8,0,0.08228960037231445
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,1,1,128,1,float16,float16,0,0.11432000398635864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,1,1,128,1,float16,fp8,0,0.15620479583740235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,4096,1,1,128,1,fp8,fp8,0,0.15638879537582398
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,1,1,128,1,float16,float16,0,0.06985599994659424
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,1,1,128,1,float16,fp8,0,0.09048640131950378
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,4096,1,1,128,1,fp8,fp8,0,0.09226080179214477
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,1,1,128,1,float16,float16,0,0.06182079911231995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,1,1,128,1,float16,fp8,0,0.07241759896278381
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,4096,1,1,128,1,fp8,fp8,0,0.07193760275840759
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,1,1,128,1,float16,float16,0,0.05820159912109375
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,6144,1,1,128,1,float16,fp8,0,0.1525760054588318
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,1,1,128,1,float16,float16,0,0.057596802711486816
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,1,1,128,1,fp8,fp8,0,0.06201760172843933
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,1,1,128,1,float16,fp8,0,0.059552001953125
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,4096,1,1,128,1,fp8,fp8,0,0.05802400112152099
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,1,1,128,1,float16,float16,0,0.07637760043144226
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,1,1,128,1,fp8,fp8,0,0.11005760431289673
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,3072,1,1,128,1,float16,fp8,0,0.11090879440307617
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,1,1,128,1,float16,float16,0,0.05355520248413086
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,6144,1,1,128,1,float16,fp8,0,0.08228480219841003
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,1,1,128,1,float16,fp8,0,0.07019519805908203
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,1,1,128,1,float16,float16,0,0.04935680031776428
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,3072,1,1,128,1,fp8,fp8,0,0.06987360119819641
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,1,1,128,1,fp8,fp8,0,0.05753759741783142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,1,1,128,1,float16,float16,0,0.047414401173591615
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,1,1,128,1,float16,fp8,0,0.050121599435806276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,3072,1,1,128,1,fp8,fp8,0,0.04950079917907715
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,1,1,128,1,float16,float16,0,0.04701279997825623
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,1,1,128,1,float16,fp8,0,0.04729759991168976
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,3072,1,1,128,1,fp8,fp8,0,0.047305598855018616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,1,1,128,1,float16,float16,0,0.07544479966163635
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,1,1,128,1,float16,fp8,0,0.12108479738235474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,2048,1,1,128,1,fp8,fp8,0,0.1212272047996521
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,1,1,128,1,float16,float16,0,0.04454880058765411
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,1,1,128,1,float16,fp8,0,0.06963359713554382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,2048,1,1,128,1,fp8,fp8,0,0.06784639954566955
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,1,1,128,1,float16,fp8,0,0.0514240026473999
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,1,1,128,1,fp8,fp8,0,0.05148959755897522
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,1,1,128,1,float16,float16,0,0.03699679970741272
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,4096,1,1,128,1,float16,fp8,0,0.06375200152397156
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,1,1,128,1,float16,fp8,0,0.04135999977588654
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,2048,1,1,128,1,fp8,fp8,0,0.04119519889354706
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,1,1,128,1,float16,float16,0,0.03511680066585541
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,1,1,128,1,float16,fp8,0,0.03707840144634247
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,2048,1,1,128,1,fp8,fp8,0,0.03711360096931458
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,1,1,128,1,float16,float16,0,0.035017600655555724
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,1,1,128,1,fp8,fp8,0,0.035043200850486754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,1,1,128,1,float16,float16,0,0.05270400047302246
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,1,1,128,1,float16,fp8,0,0.08828799724578858
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,3072,1,1,128,1,float16,fp8,0,0.05772479772567749
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1536,1,1,128,1,fp8,fp8,0,0.08840960264205933
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,1,1,128,1,float16,float16,0,0.03518880009651184
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,1,1,128,1,float16,fp8,0,0.05363199710845947
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1536,1,1,128,1,fp8,fp8,0,0.054054397344589236
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,1,1,128,1,float16,float16,0,0.03107520043849945
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,1,1,128,1,float16,fp8,0,0.04058080017566681
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1536,1,1,128,1,fp8,fp8,0,0.03924799859523773
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,1,1,128,1,float16,float16,0,0.02897599935531616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,1,1,128,1,float16,fp8,0,0.033004799485206605
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1536,1,1,128,1,fp8,fp8,0,0.033127999305725096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,1,1,128,1,float16,float16,0,0.038043200969696045
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,1,1,128,1,float16,fp8,0,0.03113119900226593
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1536,1,1,128,1,fp8,fp8,0,0.03069919943809509
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,1,1,128,1,float16,float16,0,0.027820798754692077
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,1,1,128,1,float16,fp8,0,0.028964799642562867
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1536,1,1,128,1,fp8,fp8,0,0.028944000601768494
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,1,1,128,1,float16,float16,0,0.055553597211837766
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,1,1,128,1,float16,fp8,0,0.10465919971466064
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1024,1,1,128,1,fp8,fp8,0,0.10389920473098754
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,1,1,128,1,float16,fp8,0,0.05759360194206238
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,2048,1,1,128,1,float16,fp8,0,0.03506399989128113
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,1,1,128,1,fp8,fp8,0,0.05764639973640442
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,1,1,128,1,float16,float16,0,0.026956799626350402
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,1,1,128,1,float16,fp8,0,0.03921439945697784
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1024,1,1,128,1,fp8,fp8,0,0.039233601093292235
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,1,1,128,1,float16,float16,0,0.02486719936132431
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,2048,1,1,128,1,float16,float16,0,0.03916000127792359
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,1,1,128,1,fp8,fp8,0,0.030979201197624207
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,1,1,128,1,float16,float16,0,0.02295520007610321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,1,1,128,1,float16,fp8,0,0.026203200221061707
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1024,1,1,128,1,fp8,fp8,0,0.02573919892311096
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,1,1,128,1,float16,float16,0,0.022732800245285033
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1024,1,1,128,1,float16,float16,0,0.03287999927997589
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,1,1,128,1,fp8,fp8,0,0.02473440021276474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,1,1,128,1,float16,float16,0,0.022804799675941467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,1,1,128,1,float16,fp8,0,0.022868800163269042
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1024,1,1,128,1,float16,fp8,0,0.030935999751091004
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1024,1,1,128,1,fp8,fp8,0,0.022840000689029694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,1,1,128,1,float16,float16,0,0.04552960097789764
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,1,1,128,1,float16,fp8,0,0.09467679858207703
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,1,1,128,1,float16,float16,0,0.027011200785636902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,512,1,1,128,1,fp8,fp8,0,0.0947376012802124
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,1,1,128,1,float16,fp8,0,0.051528000831604005
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,512,1,1,128,1,fp8,fp8,0,0.05203840136528015
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,1,1,128,1,float16,float16,0,0.020735999941825865
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,1,1,128,1,float16,fp8,0,0.03340800106525421
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,512,1,1,128,1,fp8,fp8,0,0.033078399300575254
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,1,1,128,1,float16,float16,0,0.01899999976158142
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,1,1,128,1,fp8,fp8,0,0.02526240050792694
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,1,1,128,1,float16,float16,0,0.01714559942483902
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,1,1,128,1,float16,fp8,0,0.021107199788093566
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1024,1,1,128,1,float16,fp8,0,0.0229312002658844
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,1,1,128,1,float16,float16,0,0.01671359986066818
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,512,1,1,128,1,fp8,fp8,0,0.02101760059595108
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,1,1,128,1,float16,fp8,0,0.018566399812698364
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,512,1,1,128,1,fp8,fp8,0,0.01887039989233017
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,1,1,128,1,float16,fp8,0,0.017110399901866913
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,1,1,128,1,float16,float16,0,0.01682240068912506
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,1,1,128,1,fp8,fp8,0,0.017103999853134155
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,1,1,128,1,float16,fp8,0,0.01681919991970062
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,512,1,1,128,1,fp8,fp8,0,0.016923199594020843
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,1,1,128,1,float16,float16,0,0.022814400494098663
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,1,1,128,1,fp8,fp8,0,0.04938719868659973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,256,1,1,128,1,float16,fp8,0,0.04970720112323761
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,1,1,128,1,float16,float16,0,0.018723200261592864
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,1,1,128,1,fp8,fp8,0,0.031118398904800414
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,1,1,128,1,float16,float16,0,0.016542400419712066
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,1,1,128,1,float16,fp8,0,0.022643199563026427
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,256,1,1,128,1,fp8,fp8,0,0.022700800001621245
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,512,1,1,128,1,float16,fp8,0,0.024806399643421174
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,1,1,128,1,float16,float16,0,0.014563199877738953
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,1,1,128,1,float16,fp8,0,0.01690080016851425
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,256,1,1,128,1,fp8,fp8,0,0.018564799427986146
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,1,1,128,1,float16,float16,0,0.014524799585342408
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,1,1,128,1,float16,fp8,0,0.015038399398326874
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,256,1,1,128,1,fp8,fp8,0,0.014574399590492249
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,1,1,128,1,float16,fp8,0,0.014595200121402741
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,1,1,128,1,fp8,fp8,0,0.014588800072669984
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,1,1,128,1,float16,float16,0,0.012763200700283051
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,1,1,128,1,float16,fp8,0,0.014569599926471711
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,256,1,1,128,1,fp8,fp8,0,0.014614400267601014
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,1,1,128,1,float16,float16,0,0.01456640064716339
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,512,1,1,128,1,float16,float16,0,0.01669120043516159
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,1,1,128,1,fp8,fp8,0,0.014398400485515595
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,1,1,128,1,float16,float16,0,0.016622400283813475
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,1,1,128,1,float16,fp8,0,0.02900480031967163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,128,1,1,128,1,fp8,fp8,0,0.02900800108909607
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,1,1,128,1,float16,float16,0,0.014497600495815277
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,1,1,128,1,float16,fp8,0,0.020644800364971162
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,256,1,1,128,1,float16,fp8,0,0.031200000643730165
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,1,1,128,1,float16,float16,0,0.012542399764060973
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,1,1,128,1,float16,fp8,0,0.014635199308395385
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,128,1,1,128,1,fp8,fp8,0,0.014660799503326416
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,1,1,128,1,float16,float16,0,0.010526400059461594
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,1,1,128,1,float16,fp8,0,0.01276639997959137
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,256,1,1,128,1,float16,float16,0,0.01449279934167862
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,128,1,1,128,1,fp8,fp8,0,0.01260959953069687
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,1,1,128,1,float16,float16,0,0.010513599961996078
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,1,1,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,1,1,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,1,1,128,1,float16,fp8,0,0.010564800351858139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,128,1,1,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,1,1,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,1,1,128,1,float16,fp8,0,0.010529600083827972
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,256,1,1,128,1,float16,fp8,0,0.014606399834156037
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,128,1,1,128,1,fp8,fp8,0,0.010550399869680404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,1,1,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,1,1,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,128,1,1,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,1,1,128,1,float16,float16,0,0.013673600554466248
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,1,1,128,1,float16,fp8,0,0.018760000169277192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,64,1,1,128,1,fp8,fp8,0,0.02048799991607666
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,1,1,128,1,float16,float16,0,0.012531200051307678
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,1,1,128,1,float16,fp8,0,0.014657600224018097
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,128,1,1,128,1,fp8,fp8,0,0.020576000213623047
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,64,1,1,128,1,fp8,fp8,0,0.014732800424098969
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,1,1,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,1,1,128,1,float16,fp8,0,0.012587200105190276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,64,1,1,128,1,fp8,fp8,0,0.012558400630950928
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,1,1,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,1,1,128,1,fp8,fp8,0,0.011139199882745743
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,1,1,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,1,1,128,1,float16,fp8,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,64,1,1,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,1,1,128,1,float16,float16,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,1,1,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,64,1,1,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,1,1,128,1,float16,float16,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,1,1,128,1,float16,fp8,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,128,1,1,128,1,fp8,fp8,0,0.012455999851226807
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,64,1,1,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,1,1,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,1,1,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,1,1,128,1,float16,float16,0,0.01249919980764389
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,1,1,128,1,float16,fp8,0,0.014633600413799287
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,32,1,1,128,1,fp8,fp8,0,0.014708800613880158
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,1,1,128,1,float16,float16,0,0.0106175996363163
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,1,1,128,1,float16,fp8,0,0.013148799538612366
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,32,1,1,128,1,fp8,fp8,0,0.012545600533485413
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,1,1,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,1,1,128,1,float16,fp8,0,0.010596799850463866
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,64,1,1,128,1,float16,fp8,0,0.010769599676132202
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,1,1,128,1,float16,float16,0,0.010625600069761276
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,1,1,128,1,float16,fp8,0,0.010620799660682679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,32,1,1,128,1,fp8,fp8,0,0.010639999806880952
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,1,1,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,1,1,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,32,1,1,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,1,1,128,1,float16,float16,0,0.01061279997229576
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,1,1,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,32,1,1,128,1,fp8,fp8,0,0.01067200005054474
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,64,1,1,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,1,1,128,1,float16,fp8,0,0.010585600137710571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,1,1,128,1,fp8,fp8,0,0.010609599947929382
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,1,1,128,1,float16,float16,0,0.010564800351858139
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,1,1,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,32,1,1,128,1,fp8,fp8,0,0.010566399991512298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,1,1,128,1,float16,fp8,0,0.012585599720478059
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,1,1,128,1,float16,float16,0,0.011088000237941742
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,16,1,1,128,1,fp8,fp8,0,0.012664000689983367
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,1,1,128,1,float16,float16,0,0.010943999886512757
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,1,1,128,1,float16,fp8,0,0.010662399977445603
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,1,1,128,1,float16,float16,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,1,1,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,16,1,1,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,1,1,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,1,1,128,1,float16,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,16,1,1,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,1,1,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,1,1,128,1,float16,fp8,0,0.010518400371074677
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,16,1,1,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,1,1,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,1,1,128,1,float16,fp8,0,0.010503999888896942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,16,1,1,128,1,fp8,fp8,0,0.010585600137710571
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,1,1,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,1,1,128,1,float16,fp8,0,0.010619200021028518
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,16,1,1,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,1,1,128,1,float16,float16,0,0.01053439974784851
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,1,1,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,32,1,1,128,1,fp8,fp8,0,0.01058880016207695
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,16,1,1,128,1,fp8,fp8,0,0.010796800255775452
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,0,0.010620799660682679
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,1,1,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,64,1,1,1,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,0,0.010503999888896942
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,32,1,1,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,32,1,1,1,128,1,fp8,fp8,0,0.010507199913263321
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,1,1,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,0,0.010553599894046783
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,8,1,1,1,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,0,0.011151999980211259
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,0,0.010635200142860412
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,4,1,1,1,128,1,fp8,fp8,0,0.010531199723482132
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,16,1,1,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,0,0.010571199655532836
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,1,1,128,1,fp8,fp8,0,0.008550400286912918
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,0,0.008529599756002426
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,1,1,1,1,128,1,fp8,fp8,0,0.01055999994277954
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA GB200,context_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,0,0.010558400303125381
