framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,96,2,128,1,float16,fp8,0,72.00833129882812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,96,2,128,1,fp8,fp8,0,72.418896484375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,96,1,128,1,float16,float16,0,91.00406494140626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,96,1,128,1,float16,fp8,0,71.3848876953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,96,1,128,1,fp8,fp8,0,72.508154296875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,96,4,128,1,float16,fp8,0,71.39459838867188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,96,4,128,1,fp8,fp8,0,72.3302734375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,96,8,128,1,float16,fp8,0,72.58831787109375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,96,2,128,1,float16,float16,0,92.50703735351563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,96,4,128,1,float16,float16,0,93.16741943359375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,96,8,128,1,fp8,fp8,0,71.77540893554688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,1,128,1,float16,fp8,0,36.611453247070315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,1,128,1,fp8,fp8,0,36.56351318359375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,1,128,1,float16,float16,0,45.57297973632812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,2,128,1,float16,fp8,0,36.716131591796874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,2,128,1,fp8,fp8,0,36.64048767089844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,2,128,1,float16,float16,0,44.98223876953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,4,128,1,float16,float16,0,46.09608764648438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,4,128,1,float16,fp8,0,36.743466186523435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,4,128,1,fp8,fp8,0,36.488711547851565
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,8,128,1,float16,fp8,0,36.30575866699219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,8,128,1,float16,float16,0,46.047393798828125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,8,128,1,fp8,fp8,0,36.9028564453125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,96,128,1,float16,float16,0,23.821754455566406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,1,128,1,float16,float16,0,23.091706848144533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,96,8,128,1,float16,float16,0,93.24923095703124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,96,128,1,float16,fp8,0,18.273471069335937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,96,128,1,fp8,fp8,0,18.219100952148438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,1,128,1,float16,fp8,0,18.45635223388672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,96,128,1,float16,float16,0,48.466024780273436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,1,128,1,fp8,fp8,0,18.25584259033203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,96,128,1,float16,fp8,0,36.995733642578124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,2,128,1,float16,fp8,0,18.186929321289064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,96,96,128,1,fp8,fp8,0,37.31975402832031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,2,128,1,fp8,fp8,0,18.013595581054688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,2,128,1,float16,float16,0,22.857911682128908
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,4,128,1,float16,float16,0,23.106903076171875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,4,128,1,float16,fp8,0,17.992514038085936
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,4,128,1,fp8,fp8,0,18.54369659423828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,96,128,1,float16,fp8,0,9.235814666748047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,96,128,1,float16,float16,0,11.947408294677734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,8,128,1,float16,fp8,0,17.960116577148437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,8,128,1,fp8,fp8,0,18.01305236816406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,96,128,1,fp8,fp8,0,9.354499053955077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,1,128,1,float16,fp8,0,9.07703857421875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,96,8,128,1,float16,float16,0,22.519755554199218
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,1,128,1,float16,float16,0,11.702683258056641
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,1,128,1,fp8,fp8,0,9.022727966308594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,2,128,1,float16,float16,0,11.179312133789063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,2,128,1,float16,fp8,0,8.860982513427734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,2,128,1,fp8,fp8,0,8.821665954589843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,4,128,1,float16,fp8,0,9.241083526611328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,4,128,1,fp8,fp8,0,8.814115142822265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,4,128,1,float16,float16,0,11.56665267944336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,8,128,1,float16,fp8,0,8.919430541992188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,8,128,1,float16,float16,0,12.016878509521485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,96,8,128,1,fp8,fp8,0,8.871585845947266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,96,1,128,1,float16,fp8,0,40.91051330566406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,96,1,128,1,fp8,fp8,0,41.985467529296876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,96,2,128,1,float16,fp8,0,41.75984191894531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,96,2,128,1,fp8,fp8,0,41.333917236328126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,96,4,128,1,float16,fp8,0,41.12156982421875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,96,1,128,1,float16,float16,0,52.056005859375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,96,2,128,1,float16,float16,0,51.353692626953126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,96,4,128,1,float16,float16,0,53.08988647460937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,96,128,1,float16,fp8,0,21.282334899902345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,96,128,1,float16,float16,0,27.362548828125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,96,128,1,fp8,fp8,0,21.348843383789063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,96,4,128,1,fp8,fp8,0,41.134918212890625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,1,128,1,float16,float16,0,26.47907409667969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,96,8,128,1,float16,fp8,0,41.89718322753906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,96,8,128,1,fp8,fp8,0,41.55546569824219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,1,128,1,float16,fp8,0,20.81416015625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,1,128,1,fp8,fp8,0,20.174215698242186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,96,8,128,1,float16,float16,0,53.4830078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,2,128,1,float16,float16,0,26.00355224609375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,2,128,1,float16,fp8,0,20.624615478515626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,2,128,1,fp8,fp8,0,20.643646240234375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,4,128,1,float16,fp8,0,20.819577026367188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,4,128,1,float16,float16,0,26.47196044921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,4,128,1,fp8,fp8,0,20.384483337402344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,96,128,1,float16,float16,0,13.853172302246094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,96,128,1,float16,fp8,0,10.913607788085937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,8,128,1,float16,fp8,0,20.77330627441406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,96,128,1,fp8,fp8,0,10.803262329101562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,8,128,1,float16,float16,0,26.45137939453125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,96,8,128,1,fp8,fp8,0,20.265550231933595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,1,128,1,float16,fp8,0,10.33126220703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,1,128,1,float16,float16,0,13.051478576660156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,1,128,1,fp8,fp8,0,10.131166076660156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,2,128,1,float16,fp8,0,10.54626235961914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,2,128,1,fp8,fp8,0,10.3498046875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,2,128,1,float16,float16,0,13.493209838867188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,4,128,1,float16,fp8,0,10.390067291259765
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,4,128,1,fp8,fp8,0,10.467625427246094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,4,128,1,float16,float16,0,12.711998748779298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,96,128,1,float16,float16,0,7.048118591308594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,96,128,1,float16,fp8,0,5.3949951171875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,8,128,1,float16,fp8,0,10.26263656616211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,96,128,1,fp8,fp8,0,5.442273712158203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,8,128,1,float16,float16,0,13.2983154296875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,96,8,128,1,fp8,fp8,0,10.87774887084961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,1,128,1,float16,fp8,0,5.049345779418945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,1,128,1,float16,float16,0,6.492104339599609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,1,128,1,fp8,fp8,0,5.285926437377929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,2,128,1,float16,fp8,0,5.052231979370117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,2,128,1,float16,float16,0,6.530833435058594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,2,128,1,fp8,fp8,0,5.169003295898437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,4,128,1,float16,fp8,0,5.056593704223633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,4,128,1,fp8,fp8,0,5.095657730102539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,4,128,1,float16,float16,0,6.676195526123047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,8,128,1,float16,fp8,0,5.112265777587891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,8,128,1,fp8,fp8,0,5.2132926940917965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,96,8,128,1,float16,float16,0,6.650717163085938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,96,1,128,1,float16,fp8,0,28.745034790039064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,96,1,128,1,fp8,fp8,0,28.850006103515625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,96,2,128,1,float16,fp8,0,28.7792236328125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,96,2,128,1,fp8,fp8,0,29.424520874023436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,96,4,128,1,float16,fp8,0,28.123623657226563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,96,1,128,1,float16,float16,0,36.939840698242186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,96,2,128,1,float16,float16,0,37.36200866699219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,96,4,128,1,float16,float16,0,37.05482482910156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,96,128,1,float16,fp8,0,15.150904846191406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,96,128,1,float16,float16,0,20.1400146484375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,96,128,1,fp8,fp8,0,15.254690551757813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,96,4,128,1,fp8,fp8,0,28.906814575195312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,1,128,1,float16,float16,0,18.315353393554688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,96,8,128,1,float16,fp8,0,29.153518676757812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,96,8,128,1,fp8,fp8,0,29.50472412109375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,96,8,128,1,float16,float16,0,36.978573608398435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,1,128,1,float16,fp8,0,14.573529052734376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,1,128,1,fp8,fp8,0,14.038211059570312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,2,128,1,float16,fp8,0,14.381539916992187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,2,128,1,float16,float16,0,18.89202117919922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,2,128,1,fp8,fp8,0,14.43115234375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,4,128,1,float16,fp8,0,14.744595336914063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,4,128,1,float16,float16,0,18.393727111816407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,96,128,1,float16,fp8,0,7.633363342285156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,4,128,1,fp8,fp8,0,14.271180725097656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,8,128,1,float16,fp8,0,14.376882934570313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,96,128,1,float16,float16,0,10.040227508544922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,96,128,1,fp8,fp8,0,7.640184020996093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,8,128,1,float16,float16,0,18.787364196777343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,96,8,128,1,fp8,fp8,0,14.534243774414062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,1,128,1,float16,fp8,0,7.154732513427734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,1,128,1,float16,float16,0,9.054062652587891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,1,128,1,fp8,fp8,0,7.109476470947266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,2,128,1,float16,fp8,0,7.233897399902344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,2,128,1,fp8,fp8,0,7.327188873291016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,2,128,1,float16,float16,0,9.205721282958985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,4,128,1,float16,float16,0,9.12640609741211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,4,128,1,float16,fp8,0,7.255830383300781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,4,128,1,fp8,fp8,0,7.336669158935547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,8,128,1,float16,fp8,0,7.24932632446289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,96,128,1,float16,fp8,0,3.7757633209228514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,96,128,1,float16,float16,0,5.085575866699219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,8,128,1,float16,float16,0,9.124976348876952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,96,8,128,1,fp8,fp8,0,7.273808288574219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,96,128,1,fp8,fp8,0,3.8376895904541017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,1,128,1,float16,float16,0,4.4987743377685545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,1,128,1,float16,fp8,0,3.5539295196533205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,1,128,1,fp8,fp8,0,3.5100849151611326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,2,128,1,float16,float16,0,4.42461929321289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,2,128,1,float16,fp8,0,3.6924625396728517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,2,128,1,fp8,fp8,0,3.648873519897461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,4,128,1,float16,fp8,0,3.5618080139160155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,4,128,1,fp8,fp8,0,3.627167892456055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,4,128,1,float16,float16,0,4.640547180175782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,8,128,1,float16,fp8,0,3.6353809356689455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,8,128,1,float16,float16,0,4.6048942565917965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,96,8,128,1,fp8,fp8,0,3.595627212524414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,96,1,128,1,float16,fp8,0,37.344747924804686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,96,1,128,1,fp8,fp8,0,37.98133850097656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,96,2,128,1,fp8,fp8,0,37.65584716796875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,96,2,128,1,float16,fp8,0,38.4680908203125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,96,4,128,1,float16,fp8,0,37.3895751953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,96,1,128,1,float16,float16,0,48.07390441894531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,96,2,128,1,float16,float16,0,47.10554809570313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,96,4,128,1,float16,float16,0,48.170474243164065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,96,128,1,float16,float16,0,25.119602966308594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,96,128,1,float16,fp8,0,19.975193786621094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,96,128,1,fp8,fp8,0,19.828825378417967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,1,128,1,float16,float16,0,23.86994171142578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,96,4,128,1,fp8,fp8,0,37.604095458984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,96,8,128,1,float16,fp8,0,37.765542602539064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,96,8,128,1,fp8,fp8,0,38.62080383300781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,1,128,1,float16,fp8,0,18.32811737060547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,96,8,128,1,float16,float16,0,48.3916748046875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,1,128,1,fp8,fp8,0,19.07882080078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,2,128,1,float16,float16,0,23.709828186035157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,2,128,1,float16,fp8,0,18.788499450683595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,2,128,1,fp8,fp8,0,18.62067565917969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,4,128,1,float16,fp8,0,19.284394836425783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,4,128,1,float16,float16,0,23.842008972167967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,4,128,1,fp8,fp8,0,18.81226043701172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,96,128,1,float16,fp8,0,9.997163391113281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,96,128,1,float16,float16,0,13.004974365234375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,8,128,1,float16,fp8,0,19.080424499511718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,96,128,1,fp8,fp8,0,10.112413024902343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,8,128,1,fp8,fp8,0,18.535130310058594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,96,8,128,1,float16,float16,0,24.195249938964842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,1,128,1,float16,fp8,0,9.18692626953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,1,128,1,float16,float16,0,11.865625762939453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,1,128,1,fp8,fp8,0,9.484473419189452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,2,128,1,float16,fp8,0,9.501217651367188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,2,128,1,float16,float16,0,12.027476501464843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,2,128,1,fp8,fp8,0,9.60931396484375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,4,128,1,float16,fp8,0,9.216387176513672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,4,128,1,fp8,fp8,0,9.270362854003906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,4,128,1,float16,float16,0,12.136949157714843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,96,128,1,float16,float16,0,6.60626220703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,96,128,1,float16,fp8,0,4.825268936157227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,8,128,1,float16,float16,0,12.04628677368164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,8,128,1,float16,fp8,0,9.682828521728515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,96,128,1,fp8,fp8,0,4.8331855773925785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,96,8,128,1,fp8,fp8,0,9.364695739746093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,1,128,1,float16,fp8,0,4.703900909423828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,1,128,1,float16,float16,0,5.935870361328125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,1,128,1,fp8,fp8,0,4.731726455688476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,2,128,1,fp8,fp8,0,4.6536815643310545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,2,128,1,float16,fp8,0,4.673494338989258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,2,128,1,float16,float16,0,5.709598541259766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,4,128,1,float16,fp8,0,4.641030502319336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,4,128,1,fp8,fp8,0,4.665596771240234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,4,128,1,float16,float16,0,5.897030258178711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,96,128,1,float16,fp8,0,2.4914464950561523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,96,128,1,float16,float16,0,2.9948896408081054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,8,128,1,float16,float16,0,5.9703937530517575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,8,128,1,float16,fp8,0,4.7659648895263675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,96,128,1,fp8,fp8,0,2.4547296524047852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,96,8,128,1,fp8,fp8,0,4.673028945922852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,1,128,1,float16,fp8,0,2.3906015396118163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,1,128,1,float16,float16,0,2.801815986633301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,1,128,1,fp8,fp8,0,2.273302459716797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,2,128,1,float16,fp8,0,2.435456085205078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,2,128,1,float16,float16,0,2.8837295532226563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,2,128,1,fp8,fp8,0,2.3462192535400392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,4,128,1,float16,float16,0,2.6652847290039063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,4,128,1,float16,fp8,0,2.3168127059936525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,4,128,1,fp8,fp8,0,2.314179229736328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,8,128,1,float16,fp8,0,2.2729488372802735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,8,128,1,float16,float16,0,2.988598442077637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,96,8,128,1,fp8,fp8,0,2.308399963378906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,96,1,128,1,float16,fp8,0,22.009306335449217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,96,2,128,1,float16,fp8,0,21.31634063720703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,96,1,128,1,fp8,fp8,0,21.989382934570312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,96,2,128,1,fp8,fp8,0,21.778646850585936
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,96,4,128,1,float16,fp8,0,21.734686279296874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,96,1,128,1,float16,float16,0,27.485043334960938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,96,2,128,1,float16,float16,0,28.026104736328126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,96,4,128,1,float16,float16,0,28.06611022949219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,96,128,1,float16,fp8,0,11.414246368408204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,96,128,1,float16,float16,0,15.251481628417968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,96,128,1,fp8,fp8,0,11.650103759765624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,1,128,1,float16,float16,0,13.573388671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,96,4,128,1,fp8,fp8,0,22.17861328125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,96,8,128,1,float16,fp8,0,21.93469696044922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,96,8,128,1,fp8,fp8,0,21.650245666503906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,96,8,128,1,float16,float16,0,27.679141235351562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,1,128,1,float16,fp8,0,10.690636444091798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,1,128,1,fp8,fp8,0,10.618016052246094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,2,128,1,float16,fp8,0,10.733865356445312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,2,128,1,float16,float16,0,14.0564697265625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,2,128,1,fp8,fp8,0,11.007030487060547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,4,128,1,float16,fp8,0,10.924523162841798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,4,128,1,float16,float16,0,14.117459106445313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,4,128,1,fp8,fp8,0,10.555928039550782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,8,128,1,float16,fp8,0,10.790113830566407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,96,128,1,float16,fp8,0,5.9708911895751955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,96,128,1,fp8,fp8,0,5.744595336914062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,96,128,1,float16,float16,0,7.673060607910156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,8,128,1,float16,float16,0,14.285920715332031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,96,8,128,1,fp8,fp8,0,11.167259216308594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,1,128,1,float16,float16,0,6.785391998291016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,1,128,1,float16,fp8,0,5.244968032836914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,1,128,1,fp8,fp8,0,5.283211135864258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,2,128,1,float16,fp8,0,5.302268981933594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,2,128,1,fp8,fp8,0,5.465385437011719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,2,128,1,float16,float16,0,7.039697265625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,4,128,1,float16,float16,0,6.7201377868652346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,4,128,1,float16,fp8,0,5.380923080444336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,4,128,1,fp8,fp8,0,5.391505432128906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,96,128,1,float16,float16,0,3.6792545318603516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,96,128,1,float16,fp8,0,2.950164794921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,8,128,1,float16,fp8,0,5.450559997558594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,8,128,1,float16,float16,0,6.954792022705078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,96,8,128,1,fp8,fp8,0,5.371793746948242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,96,128,1,fp8,fp8,0,2.921504020690918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,1,128,1,float16,fp8,0,2.651412773132324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,1,128,1,float16,float16,0,3.241582489013672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,1,128,1,fp8,fp8,0,2.7641904830932615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,2,128,1,float16,float16,0,3.341182327270508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,2,128,1,fp8,fp8,0,2.879204750061035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,4,128,1,float16,float16,0,3.2770751953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,4,128,1,float16,fp8,0,2.6511808395385743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,4,128,1,fp8,fp8,0,2.651131248474121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,96,128,1,float16,float16,0,1.69354248046875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,8,128,1,float16,fp8,0,2.6952272415161134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,8,128,1,float16,float16,0,3.4681327819824217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,96,128,1,float16,fp8,0,1.5699888229370118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,96,128,1,fp8,fp8,0,1.4151056289672852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,8,128,1,fp8,fp8,0,2.811625671386719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,96,2,128,1,float16,fp8,0,2.7507951736450194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,1,128,1,float16,float16,0,1.552899169921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,1,128,1,fp8,fp8,0,1.3344479560852052
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,1,128,1,float16,fp8,0,1.4552207946777345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,2,128,1,float16,fp8,0,1.3190848350524902
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,2,128,1,float16,float16,0,1.7815168380737305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,2,128,1,fp8,fp8,0,1.3440256118774414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,4,128,1,float16,fp8,0,1.4474032402038575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,4,128,1,float16,float16,0,1.5354592323303222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,4,128,1,fp8,fp8,0,1.315180778503418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,8,128,1,float16,float16,0,1.5415360450744628
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,8,128,1,float16,fp8,0,1.3160592079162599
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,96,8,128,1,fp8,fp8,0,1.5992799758911134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,96,1,128,1,fp8,fp8,0,19.814190673828126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,96,1,128,1,float16,fp8,0,20.29126434326172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,96,2,128,1,float16,fp8,0,19.821559143066406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,96,2,128,1,fp8,fp8,0,20.244100952148436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,96,1,128,1,float16,float16,0,25.690762329101563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,96,4,128,1,float16,fp8,0,19.77012939453125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,96,2,128,1,float16,float16,0,26.108013916015626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,96,4,128,1,float16,float16,0,25.7723388671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,96,128,1,float16,fp8,0,10.900633239746094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,1,128,1,float16,float16,0,12.93739013671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,96,128,1,float16,float16,0,14.672538757324219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,96,4,128,1,fp8,fp8,0,20.66088104248047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,96,8,128,1,float16,fp8,0,20.369747924804688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,96,8,128,1,fp8,fp8,0,20.381292724609374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,96,128,1,fp8,fp8,0,11.224276733398437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,96,8,128,1,float16,float16,0,25.904989624023436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,1,128,1,float16,fp8,0,10.04787826538086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,1,128,1,fp8,fp8,0,9.996832275390625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,2,128,1,float16,fp8,0,9.92418212890625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,2,128,1,fp8,fp8,0,9.84952621459961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,4,128,1,float16,fp8,0,10.102033233642578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,2,128,1,float16,float16,0,12.8296142578125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,4,128,1,float16,float16,0,12.78417739868164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,4,128,1,fp8,fp8,0,10.024988555908203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,96,128,1,float16,fp8,0,5.523436737060547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,96,128,1,float16,float16,0,7.096984100341797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,96,128,1,fp8,fp8,0,5.6042430877685545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,8,128,1,float16,fp8,0,10.179561614990234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,1,128,1,float16,float16,0,6.27645263671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,8,128,1,float16,float16,0,12.53700942993164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,96,8,128,1,fp8,fp8,0,9.985177612304687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,1,128,1,float16,fp8,0,4.966366577148437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,1,128,1,fp8,fp8,0,5.002825546264648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,2,128,1,float16,fp8,0,5.002561569213867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,2,128,1,float16,float16,0,6.117238235473633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,4,128,1,float16,fp8,0,4.984548950195313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,4,128,1,float16,float16,0,6.374769592285157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,8,128,1,float16,fp8,0,5.021089553833008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,8,128,1,float16,float16,0,6.538483428955078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,96,128,1,float16,float16,0,3.492532730102539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,2,128,1,fp8,fp8,0,4.967454528808593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,96,128,1,float16,fp8,0,2.7022815704345704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,8,128,1,fp8,fp8,0,5.115563201904297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,96,4,128,1,fp8,fp8,0,4.949332809448242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,96,128,1,fp8,fp8,0,2.8153568267822267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,1,128,1,float16,fp8,0,2.5892192840576174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,1,128,1,float16,float16,0,3.0021999359130858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,2,128,1,float16,fp8,0,2.4696975708007813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,1,128,1,fp8,fp8,0,2.575760078430176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,2,128,1,fp8,fp8,0,2.479080009460449
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,2,128,1,float16,float16,0,2.965113639831543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,4,128,1,float16,fp8,0,2.4942176818847654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,4,128,1,float16,float16,0,2.9559568405151366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,96,128,1,float16,fp8,0,1.465294361114502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,96,128,1,float16,float16,0,1.6864559173583984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,4,128,1,fp8,fp8,0,2.4850351333618166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,96,128,1,fp8,fp8,0,1.3591872215270997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,8,128,1,float16,fp8,0,2.496963119506836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,8,128,1,fp8,fp8,0,2.472687911987305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,1,128,1,float16,float16,0,1.5513968467712402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,96,8,128,1,float16,float16,0,3.2085502624511717
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,1,128,1,float16,fp8,0,1.2527615547180175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,1,128,1,fp8,fp8,0,1.4326751708984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,2,128,1,float16,float16,0,1.4508624076843262
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,2,128,1,float16,fp8,0,1.232256031036377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,2,128,1,fp8,fp8,0,1.248960018157959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,4,128,1,float16,float16,0,1.43536319732666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,4,128,1,float16,fp8,0,1.3193903923034669
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,4,128,1,fp8,fp8,0,1.5770048141479491
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,96,128,1,float16,float16,0,0.8027600288391114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,96,128,1,float16,fp8,0,0.6896543979644776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,8,128,1,float16,float16,0,1.4413007736206054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,8,128,1,float16,fp8,0,1.2897583961486816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,96,8,128,1,fp8,fp8,0,1.2906175613403321
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,1,128,1,fp8,fp8,0,0.6185328006744385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,2,128,1,float16,float16,0,0.7102287769317627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,2,128,1,fp8,fp8,0,0.6216351985931396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,4,128,1,float16,float16,0,0.7222784042358399
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,4,128,1,float16,fp8,0,0.621131181716919
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,4,128,1,fp8,fp8,0,0.6220719814300537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,8,128,1,float16,float16,0,0.7206480026245117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,8,128,1,float16,fp8,0,0.6511040210723877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,8,128,1,fp8,fp8,0,0.6221712112426758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,1,128,1,float16,float16,0,0.7202015876770019
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,1,128,1,float16,fp8,0,0.7456463813781739
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,2,128,1,float16,fp8,0,0.6895648002624511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,96,96,128,1,fp8,fp8,0,0.7165952205657959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,96,1,128,1,float16,fp8,0,11.937782287597656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,96,2,128,1,float16,fp8,0,11.655073547363282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,96,1,128,1,float16,float16,0,14.584230041503906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,96,2,128,1,fp8,fp8,0,11.812452697753907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,96,2,128,1,float16,float16,0,14.751039123535156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,96,1,128,1,fp8,fp8,0,11.797342681884766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,96,4,128,1,float16,float16,0,14.645744323730469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,96,128,1,float16,fp8,0,6.59435806274414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,96,128,1,float16,float16,0,8.768994903564453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,96,4,128,1,float16,fp8,0,12.026058959960938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,96,4,128,1,fp8,fp8,0,11.821102142333984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,96,8,128,1,float16,fp8,0,11.84521942138672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,96,8,128,1,fp8,fp8,0,11.880903625488282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,96,128,1,fp8,fp8,0,6.55425796508789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,96,8,128,1,float16,float16,0,14.7613525390625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,1,128,1,float16,fp8,0,5.8816062927246096
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,1,128,1,float16,float16,0,7.314139556884766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,1,128,1,fp8,fp8,0,5.985736083984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,2,128,1,float16,float16,0,7.275737762451172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,2,128,1,float16,fp8,0,5.928731155395508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,2,128,1,fp8,fp8,0,5.887360000610352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,4,128,1,float16,fp8,0,5.8380992889404295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,4,128,1,float16,float16,0,7.2485107421875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,4,128,1,fp8,fp8,0,5.927414321899414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,96,128,1,float16,fp8,0,3.370560073852539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,96,128,1,float16,float16,0,4.211038589477539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,8,128,1,float16,fp8,0,6.149259185791015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,8,128,1,float16,float16,0,7.428524780273437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,96,8,128,1,fp8,fp8,0,5.944152069091797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,96,128,1,fp8,fp8,0,3.5024208068847655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,1,128,1,float16,float16,0,3.5388065338134767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,1,128,1,float16,fp8,0,2.892465591430664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,1,128,1,fp8,fp8,0,2.98437442779541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,2,128,1,float16,fp8,0,2.9420543670654298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,2,128,1,float16,float16,0,3.6120113372802733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,2,128,1,fp8,fp8,0,3.0089664459228516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,4,128,1,float16,fp8,0,3.00742244720459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,4,128,1,fp8,fp8,0,3.1214208602905273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,4,128,1,float16,float16,0,3.598582458496094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,8,128,1,float16,fp8,0,2.9681615829467773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,96,128,1,float16,float16,0,2.0236047744750976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,96,128,1,float16,fp8,0,1.6274015426635742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,8,128,1,float16,float16,0,3.5812606811523438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,96,8,128,1,fp8,fp8,0,2.9773231506347657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,96,128,1,fp8,fp8,0,1.6583967208862305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,1,128,1,float16,float16,0,1.7113487243652343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,1,128,1,float16,fp8,0,1.5603919982910157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,1,128,1,fp8,fp8,0,1.479360008239746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,2,128,1,float16,fp8,0,1.4592528343200684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,2,128,1,fp8,fp8,0,1.5817808151245116
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,2,128,1,float16,float16,0,1.7807935714721679
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,4,128,1,float16,fp8,0,1.4512031555175782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,4,128,1,float16,float16,0,1.7397327423095703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,4,128,1,fp8,fp8,0,1.5661919593811036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,8,128,1,float16,float16,0,1.7424608230590821
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,96,128,1,float16,float16,0,0.9859168052673339
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,96,128,1,float16,fp8,0,0.8744000434875489
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,8,128,1,float16,fp8,0,1.4667471885681151
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,96,128,1,fp8,fp8,0,0.8176848411560058
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,96,8,128,1,fp8,fp8,0,1.4439791679382323
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,1,128,1,float16,float16,0,0.9370351791381836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,1,128,1,float16,fp8,0,0.7414559841156005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,1,128,1,fp8,fp8,0,0.8544112205505371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,2,128,1,float16,fp8,0,0.7602352142333985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,2,128,1,fp8,fp8,0,0.7360544204711914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,2,128,1,float16,float16,0,0.9188223838806152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,4,128,1,float16,float16,0,0.8468111991882324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,4,128,1,float16,fp8,0,0.7261775970458985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,4,128,1,fp8,fp8,0,0.844644832611084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,8,128,1,float16,float16,0,0.8389504432678223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,96,128,1,float16,fp8,0,0.4517183780670166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,96,128,1,fp8,fp8,0,0.4256768226623535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,1,128,1,float16,float16,0,0.4251855850219727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,1,128,1,float16,fp8,0,0.40655198097229006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,1,128,1,fp8,fp8,0,0.39640159606933595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,2,128,1,float16,float16,0,0.43916001319885256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,2,128,1,float16,fp8,0,0.37531518936157227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,2,128,1,fp8,fp8,0,0.3676608085632324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,4,128,1,float16,fp8,0,0.3974319934844971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,4,128,1,float16,float16,0,0.44337120056152346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,4,128,1,fp8,fp8,0,0.3830672025680542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,8,128,1,float16,float16,0,0.4274928092956543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,8,128,1,float16,fp8,0,0.3700176000595093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,8,128,1,fp8,fp8,0,0.38351199626922605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,96,96,128,1,float16,float16,0,0.5067791938781738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,8,128,1,fp8,fp8,0,0.7392223834991455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,96,8,128,1,float16,fp8,0,0.853217601776123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,96,1,128,1,fp8,fp8,0,11.237305450439454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,96,1,128,1,float16,fp8,0,11.377721405029297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,96,2,128,1,float16,fp8,0,11.293500518798828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,96,1,128,1,float16,float16,0,14.415359497070312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,96,2,128,1,fp8,fp8,0,11.437454223632812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,96,4,128,1,float16,fp8,0,11.47912826538086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,96,4,128,1,float16,float16,0,14.092181396484374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,96,2,128,1,float16,float16,0,14.600711059570312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,96,128,1,float16,fp8,0,6.7028350830078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,96,4,128,1,fp8,fp8,0,11.323350524902343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,96,8,128,1,float16,fp8,0,11.341732788085938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,96,128,1,fp8,fp8,0,6.621998596191406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,1,128,1,float16,float16,0,6.940643310546875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,96,8,128,1,fp8,fp8,0,11.791893005371094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,96,128,1,float16,float16,0,8.574961853027343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,1,128,1,float16,fp8,0,5.703524780273438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,1,128,1,fp8,fp8,0,5.763580703735352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,96,8,128,1,float16,float16,0,14.674479675292968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,2,128,1,float16,float16,0,6.950910186767578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,2,128,1,float16,fp8,0,5.710785675048828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,2,128,1,fp8,fp8,0,5.763467025756836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,4,128,1,float16,fp8,0,5.752737426757813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,4,128,1,float16,float16,0,6.979283142089844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,4,128,1,fp8,fp8,0,5.719572830200195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,96,128,1,float16,float16,0,4.306027221679687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,8,128,1,float16,float16,0,7.075734710693359
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,8,128,1,float16,fp8,0,5.988927841186523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,96,8,128,1,fp8,fp8,0,5.7518798828125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,96,128,1,fp8,fp8,0,3.3831424713134766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,96,128,1,float16,fp8,0,3.5264560699462892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,1,128,1,float16,float16,0,3.29351692199707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,1,128,1,float16,fp8,0,2.8182464599609376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,1,128,1,fp8,fp8,0,3.001006317138672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,2,128,1,fp8,fp8,0,2.824929618835449
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,2,128,1,float16,fp8,0,2.879449653625488
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,2,128,1,float16,float16,0,3.413188934326172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,4,128,1,float16,fp8,0,3.0276784896850586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,4,128,1,fp8,fp8,0,2.8903663635253904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,8,128,1,float16,float16,0,3.5027088165283202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,96,128,1,float16,float16,0,1.9863136291503907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,96,128,1,float16,fp8,0,1.6992399215698242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,96,128,1,fp8,fp8,0,1.6891712188720702
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,1,128,1,float16,float16,0,1.6283599853515625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,8,128,1,float16,fp8,0,2.8983232498168947
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,8,128,1,fp8,fp8,0,2.8499216079711913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,1,128,1,float16,fp8,0,1.50360164642334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,1,128,1,fp8,fp8,0,1.4746272087097168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,2,128,1,float16,float16,0,1.6423072814941406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,2,128,1,float16,fp8,0,1.430788803100586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,2,128,1,fp8,fp8,0,1.4721440315246581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,96,4,128,1,float16,float16,0,3.4798606872558593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,4,128,1,float16,fp8,0,1.431599998474121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,4,128,1,float16,float16,0,1.6636976242065429
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,4,128,1,fp8,fp8,0,1.434119987487793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,96,128,1,float16,float16,0,0.989345645904541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,96,128,1,float16,fp8,0,0.8305343627929688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,96,128,1,fp8,fp8,0,0.8325776100158692
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,1,128,1,float16,fp8,0,0.7119999885559082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,8,128,1,float16,fp8,0,1.485769557952881
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,1,128,1,fp8,fp8,0,0.7116064071655274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,2,128,1,float16,fp8,0,0.7164783954620362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,2,128,1,fp8,fp8,0,0.7153855800628662
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,2,128,1,float16,float16,0,0.811627197265625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,4,128,1,float16,float16,0,0.8230287551879882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,4,128,1,float16,fp8,0,0.7191279888153076
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,4,128,1,fp8,fp8,0,0.718891191482544
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,8,128,1,float16,fp8,0,0.7183104038238526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,8,128,1,float16,float16,0,0.824129581451416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,8,128,1,fp8,fp8,0,0.7436304092407227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,96,128,1,float16,float16,0,0.5018112182617187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,96,128,1,fp8,fp8,0,0.4218416213989258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,1,128,1,float16,fp8,0,0.3657520055770874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,1,128,1,fp8,fp8,0,0.3613327980041504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,2,128,1,float16,float16,0,0.4135791778564453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,2,128,1,fp8,fp8,0,0.3633343935012817
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,4,128,1,float16,float16,0,0.41533761024475097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,8,128,1,float16,float16,0,1.6430992126464843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,96,8,128,1,fp8,fp8,0,1.4207663536071777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,4,128,1,fp8,fp8,0,0.36916799545288087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,8,128,1,float16,fp8,0,0.36183838844299315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,8,128,1,float16,float16,0,0.417958402633667
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,8,128,1,fp8,fp8,0,0.36876161098480226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,96,128,1,float16,float16,0,0.2620896100997925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,96,128,1,float16,fp8,0,0.21873760223388672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,96,128,1,fp8,fp8,0,0.21908481121063234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,1,128,1,float16,float16,0,0.21154720783233644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,1,128,1,fp8,fp8,0,0.18534239530563354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,2,128,1,float16,fp8,0,0.18539040088653563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,2,128,1,fp8,fp8,0,0.18542879819869995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,4,128,1,float16,float16,0,0.21434240341186522
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,96,128,1,float16,fp8,0,0.4230031967163086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,1,128,1,float16,float16,0,0.4119455814361572
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,4,128,1,float16,fp8,0,0.18577760457992554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,4,128,1,fp8,fp8,0,0.18688000440597535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,8,128,1,float16,float16,0,0.21657280921936034
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,8,128,1,float16,fp8,0,0.18666239976882934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,2,128,1,float16,fp8,0,0.36606559753417967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,8,128,1,fp8,fp8,0,0.18715039491653443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,96,1,128,1,float16,float16,0,0.8910880088806152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,96,4,128,1,float16,fp8,0,0.3647824048995972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,1,128,1,float16,fp8,0,0.18522559404373168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,96,2,128,1,float16,float16,0,0.21230719089508057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,96,1,128,1,float16,fp8,0,6.953959655761719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,96,1,128,1,fp8,fp8,0,6.928009796142578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,96,2,128,1,float16,fp8,0,7.011344146728516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,96,2,128,1,fp8,fp8,0,6.935393524169922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,96,1,128,1,float16,float16,0,8.347281646728515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,96,2,128,1,float16,float16,0,8.496566772460938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,96,4,128,1,float16,fp8,0,7.008191680908203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,96,4,128,1,float16,float16,0,8.361062622070312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,96,128,1,float16,fp8,0,4.234454345703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,96,128,1,float16,float16,0,5.294406509399414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,96,128,1,fp8,fp8,0,4.214910507202148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,96,4,128,1,fp8,fp8,0,7.017212677001953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,1,128,1,float16,float16,0,4.09912338256836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,96,8,128,1,fp8,fp8,0,6.966044616699219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,96,8,128,1,float16,fp8,0,7.115032196044922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,96,8,128,1,float16,float16,0,8.643736267089844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,1,128,1,float16,fp8,0,3.5361888885498045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,1,128,1,fp8,fp8,0,3.462508773803711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,2,128,1,float16,fp8,0,3.507032012939453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,2,128,1,float16,float16,0,4.202280044555664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,2,128,1,fp8,fp8,0,3.4728462219238283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,4,128,1,float16,fp8,0,3.5684703826904296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,4,128,1,float16,float16,0,4.231444931030273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,4,128,1,fp8,fp8,0,3.502228927612305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,96,128,1,float16,float16,0,2.5287120819091795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,96,128,1,float16,fp8,0,2.1018543243408203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,8,128,1,float16,fp8,0,3.5099281311035155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,96,128,1,fp8,fp8,0,2.1745840072631837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,8,128,1,float16,float16,0,4.287627029418945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,96,8,128,1,fp8,fp8,0,3.556844711303711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,1,128,1,float16,float16,0,1.9904176712036132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,1,128,1,float16,fp8,0,1.7965263366699218
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,1,128,1,fp8,fp8,0,1.74401912689209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,2,128,1,float16,fp8,0,1.7587791442871095
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,2,128,1,float16,float16,0,1.9891263961791992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,2,128,1,fp8,fp8,0,1.8254816055297851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,4,128,1,float16,float16,0,2.0251840591430663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,4,128,1,float16,fp8,0,1.807593536376953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,4,128,1,fp8,fp8,0,1.7583616256713868
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,96,128,1,float16,fp8,0,1.1061887741088867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,96,128,1,float16,float16,0,1.2750800132751465
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,8,128,1,float16,fp8,0,1.755254364013672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,8,128,1,float16,float16,0,2.028116798400879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,96,8,128,1,fp8,fp8,0,1.8157407760620117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,96,128,1,fp8,fp8,0,1.0583904266357422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,1,128,1,float16,float16,0,0.9850735664367676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,1,128,1,float16,fp8,0,0.8859199523925781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,1,128,1,fp8,fp8,0,0.9154720306396484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,2,128,1,float16,float16,0,0.9977935791015625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,2,128,1,float16,fp8,0,0.9160400390625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,2,128,1,fp8,fp8,0,0.8793919563293457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,4,128,1,float16,fp8,0,0.8813183784484864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,4,128,1,float16,float16,0,0.9889552116394043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,4,128,1,fp8,fp8,0,0.8817008018493653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,96,128,1,float16,fp8,0,0.5356480121612549
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,8,128,1,float16,float16,0,1.0093024253845215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,8,128,1,float16,fp8,0,0.895628833770752
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,1,128,1,float16,float16,0,0.5075007915496826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,96,128,1,float16,float16,0,0.6381392002105712
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,1,128,1,float16,fp8,0,0.44293761253356934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,2,128,1,float16,fp8,0,0.4443056106567383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,2,128,1,fp8,fp8,0,0.45097599029541013
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,4,128,1,float16,float16,0,0.5054831981658936
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,4,128,1,float16,fp8,0,0.44677119255065917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,4,128,1,fp8,fp8,0,0.44600639343261717
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,8,128,1,float16,float16,0,0.5145247936248779
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,8,128,1,float16,fp8,0,0.44918241500854494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,8,128,1,fp8,fp8,0,0.44849119186401365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,96,128,1,float16,float16,0,0.32880799770355223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,96,128,1,float16,fp8,0,0.27957921028137206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,96,128,1,fp8,fp8,0,0.2753119945526123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,1,128,1,float16,float16,0,0.25451200008392333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,1,128,1,float16,fp8,0,0.22544639110565184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,1,128,1,fp8,fp8,0,0.2290560007095337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,2,128,1,float16,float16,0,0.2558144092559814
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,2,128,1,float16,fp8,0,0.22715840339660645
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,2,128,1,fp8,fp8,0,0.22710878849029542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,4,128,1,float16,float16,0,0.25994560718536375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,4,128,1,float16,fp8,0,0.22854719161987305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,4,128,1,fp8,fp8,0,0.2279616117477417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,8,128,1,float16,fp8,0,0.22888000011444093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,8,128,1,fp8,fp8,0,0.2293328046798706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,96,128,1,float16,float16,0,0.17078239917755128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,96,128,1,float16,fp8,0,0.14387359619140624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,96,128,1,fp8,fp8,0,0.14382079839706421
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,1,128,1,float16,float16,0,0.1337183952331543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,1,128,1,fp8,fp8,0,0.11828000545501709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,2,128,1,float16,float16,0,0.13369760513305665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,96,128,1,fp8,fp8,0,0.5367008209228515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,96,8,128,1,fp8,fp8,0,0.9128352165222168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,2,128,1,float16,fp8,0,0.11797920465469361
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,2,128,1,fp8,fp8,0,0.11841599941253662
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,4,128,1,float16,float16,0,0.13521120548248292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,4,128,1,fp8,fp8,0,0.11880960464477539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,4,128,1,float16,fp8,0,0.11816799640655518
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,8,128,1,float16,fp8,0,0.11932640075683594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,1,128,1,fp8,fp8,0,0.4427807807922363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,8,128,1,fp8,fp8,0,0.11904319524765014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,96,8,128,1,float16,float16,0,0.26094880104064944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,1,128,1,float16,fp8,0,0.11785279512405396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,96,8,128,1,float16,float16,0,0.13613120317459107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,96,2,128,1,float16,float16,0,0.536195182800293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,96,1,128,1,float16,fp8,0,7.297854614257813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,96,1,128,1,fp8,fp8,0,7.319545745849609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,96,1,128,1,float16,float16,0,8.606435394287109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,96,2,128,1,float16,float16,0,8.52099838256836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,96,2,128,1,float16,fp8,0,7.294099426269531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,96,2,128,1,fp8,fp8,0,7.304841613769531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,96,4,128,1,float16,fp8,0,7.342948913574219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,96,4,128,1,float16,float16,0,8.5364013671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,96,4,128,1,fp8,fp8,0,7.318865966796875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,96,128,1,float16,fp8,0,4.562387084960937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,96,128,1,float16,float16,0,5.746590423583984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,96,8,128,1,float16,fp8,0,7.380738830566406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,96,8,128,1,fp8,fp8,0,7.434614562988282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,96,128,1,fp8,fp8,0,4.617854309082031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,1,128,1,float16,fp8,0,3.6584415435791016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,1,128,1,float16,float16,0,4.12862548828125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,1,128,1,fp8,fp8,0,3.682038497924805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,96,8,128,1,float16,float16,0,8.693726348876954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,2,128,1,float16,fp8,0,3.6732208251953127
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,2,128,1,float16,float16,0,4.223448181152344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,2,128,1,fp8,fp8,0,3.670003128051758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,4,128,1,float16,fp8,0,3.771031951904297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,4,128,1,float16,float16,0,4.238025665283203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,4,128,1,fp8,fp8,0,3.6741329193115235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,8,128,1,float16,fp8,0,3.6819393157958986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,8,128,1,float16,float16,0,4.19330062866211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,96,128,1,float16,float16,0,2.7802640914916994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,96,8,128,1,fp8,fp8,0,3.725543975830078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,96,128,1,float16,fp8,0,2.3118896484375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,1,128,1,float16,float16,0,2.035228729248047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,1,128,1,float16,fp8,0,1.8243471145629884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,96,128,1,fp8,fp8,0,2.3382959365844727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,1,128,1,fp8,fp8,0,1.8438192367553712
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,2,128,1,float16,fp8,0,1.8330480575561523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,2,128,1,float16,float16,0,2.0433023452758787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,2,128,1,fp8,fp8,0,1.8227104187011718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,4,128,1,float16,fp8,0,1.8970224380493164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,4,128,1,fp8,fp8,0,1.8370223999023438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,4,128,1,float16,float16,0,2.062388801574707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,8,128,1,float16,float16,0,2.0985391616821287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,96,128,1,float16,float16,0,1.4070015907287599
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,96,128,1,float16,fp8,0,1.1517824172973632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,8,128,1,fp8,fp8,0,1.853540802001953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,1,128,1,float16,float16,0,1.004849624633789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,1,128,1,float16,fp8,0,0.95523681640625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,96,128,1,fp8,fp8,0,1.1589200019836425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,1,128,1,fp8,fp8,0,0.9588159561157227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,2,128,1,float16,float16,0,1.0328847885131835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,2,128,1,float16,fp8,0,0.9174703598022461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,2,128,1,fp8,fp8,0,0.9210991859436035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,4,128,1,float16,fp8,0,0.9453488349914551
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,4,128,1,fp8,fp8,0,0.9249152183532715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,8,128,1,float16,float16,0,1.0465359687805176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,96,128,1,float16,float16,0,0.7007040023803711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,8,128,1,float16,fp8,0,0.9575072288513183
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,96,128,1,fp8,fp8,0,0.5852511882781982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,8,128,1,fp8,fp8,0,0.9289728164672851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,1,128,1,float16,float16,0,0.5124671936035157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,1,128,1,float16,fp8,0,0.4644576072692871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,2,128,1,float16,float16,0,0.5103903770446777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,2,128,1,float16,fp8,0,0.4641791820526123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,2,128,1,fp8,fp8,0,0.46855998039245605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,96,8,128,1,float16,fp8,0,1.8608480453491212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,4,128,1,float16,fp8,0,0.46747522354125975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,4,128,1,fp8,fp8,0,0.4666111946105957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,8,128,1,float16,float16,0,0.5261231899261475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,8,128,1,float16,fp8,0,0.47434558868408205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,8,128,1,fp8,fp8,0,0.47202720642089846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,96,128,1,float16,fp8,0,0.3008768081665039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,1,128,1,float16,float16,0,0.2611056089401245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,1,128,1,float16,fp8,0,0.23850560188293457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,1,128,1,fp8,fp8,0,0.2373055934906006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,96,4,128,1,float16,float16,0,1.0186448097229004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,2,128,1,float16,float16,0,0.26340160369873045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,2,128,1,float16,fp8,0,0.2383824110031128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,4,128,1,float16,float16,0,0.2648544073104858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,4,128,1,float16,fp8,0,0.23955841064453126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,96,128,1,float16,fp8,0,0.5888864040374756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,4,128,1,fp8,fp8,0,0.2413088083267212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,8,128,1,float16,float16,0,0.26841919422149657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,8,128,1,float16,fp8,0,0.24342401027679444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,8,128,1,fp8,fp8,0,0.24293119907379152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,96,128,1,float16,float16,0,0.18880480527877808
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,1,128,1,fp8,fp8,0,0.46892480850219725
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,96,128,1,fp8,fp8,0,0.15728800296783446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,1,128,1,float16,fp8,0,0.12422239780426025
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,1,128,1,fp8,fp8,0,0.12383040189743041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,2,128,1,float16,float16,0,0.13750079870224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,2,128,1,float16,fp8,0,0.12415039539337158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,96,4,128,1,float16,float16,0,0.5166863918304443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,2,128,1,fp8,fp8,0,0.12466399669647217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,4,128,1,float16,float16,0,0.138646399974823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,4,128,1,float16,fp8,0,0.12418240308761597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,4,128,1,fp8,fp8,0,0.1252400040626526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,8,128,1,float16,float16,0,0.1397552013397217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,8,128,1,float16,fp8,0,0.12539360523223878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,8,128,1,fp8,fp8,0,0.1263200044631958
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,96,128,1,float16,float16,0,0.35848801136016845
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,96,128,1,float16,float16,0,0.09921759963035584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,96,128,1,float16,fp8,0,0.08407679796218873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,96,128,1,fp8,fp8,0,0.08417119979858398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,1,128,1,float16,fp8,0,0.06402559876441956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,1,128,1,fp8,fp8,0,0.0641871988773346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,2,128,1,float16,float16,0,0.07436800003051758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,2,128,1,float16,fp8,0,0.06448799967765809
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,96,128,1,fp8,fp8,0,0.300763201713562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,2,128,1,fp8,fp8,0,0.0639136016368866
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,4,128,1,float16,float16,0,0.07495520114898682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,4,128,1,float16,fp8,0,0.06467999815940857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,4,128,1,fp8,fp8,0,0.0648751974105835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,8,128,1,float16,fp8,0,0.06593760251998901
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,8,128,1,fp8,fp8,0,0.06521120071411132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,96,2,128,1,fp8,fp8,0,0.23929920196533203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,1,128,1,float16,float16,0,0.13673280477523803
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,1,128,1,float16,float16,0,0.07378720045089722
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,96,8,128,1,float16,float16,0,0.07613279819488525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,96,96,128,1,float16,fp8,0,0.15839200019836425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,96,1,128,1,float16,fp8,0,5.363241577148438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,96,1,128,1,float16,float16,0,5.675753784179688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,96,1,128,1,fp8,fp8,0,5.406582260131836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,96,2,128,1,float16,float16,0,5.7634223937988285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,96,2,128,1,float16,fp8,0,5.357374572753907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,96,2,128,1,fp8,fp8,0,5.352252960205078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,96,4,128,1,float16,float16,0,5.779569625854492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,96,4,128,1,float16,fp8,0,5.409558486938477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,96,4,128,1,fp8,fp8,0,5.447356796264648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,96,8,128,1,float16,float16,0,5.977804946899414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,96,8,128,1,float16,fp8,0,5.508900833129883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,96,128,1,float16,float16,0,4.349863815307617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,96,8,128,1,fp8,fp8,0,5.485827255249023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,1,128,1,float16,float16,0,2.835985565185547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,1,128,1,float16,fp8,0,2.7402639389038086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,96,128,1,fp8,fp8,0,3.6916446685791016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,1,128,1,fp8,fp8,0,2.672916793823242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,2,128,1,float16,float16,0,2.8694831848144533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,2,128,1,float16,fp8,0,2.7317392349243166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,2,128,1,fp8,fp8,0,2.6952768325805665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,4,128,1,float16,float16,0,2.874339294433594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,4,128,1,float16,fp8,0,2.7311279296875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,4,128,1,fp8,fp8,0,2.715247917175293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,96,128,1,float16,fp8,0,3.7138561248779296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,8,128,1,float16,float16,0,2.9624959945678713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,8,128,1,float16,fp8,0,2.7854175567626953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,96,128,1,float16,float16,0,2.1577072143554688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,96,128,1,float16,fp8,0,1.881056022644043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,96,8,128,1,fp8,fp8,0,2.7714080810546875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,1,128,1,float16,float16,0,1.4091744422912598
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,96,128,1,fp8,fp8,0,1.8670175552368165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,1,128,1,float16,fp8,0,1.3613696098327637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,1,128,1,fp8,fp8,0,1.4196175575256347
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,2,128,1,float16,float16,0,1.421553611755371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,2,128,1,float16,fp8,0,1.3663999557495117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,2,128,1,fp8,fp8,0,1.3603856086730957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,4,128,1,float16,fp8,0,1.3592080116271972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,4,128,1,float16,float16,0,1.4434639930725097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,4,128,1,fp8,fp8,0,1.3737551689147949
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,8,128,1,float16,float16,0,1.4715904235839843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,8,128,1,float16,fp8,0,1.3978816032409669
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,96,8,128,1,fp8,fp8,0,1.3885760307312012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,1,128,1,float16,float16,0,0.7272496223449707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,96,128,1,float16,float16,0,1.0918160438537599
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,96,128,1,float16,fp8,0,0.9392191886901855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,1,128,1,float16,fp8,0,0.6854256153106689
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,96,128,1,fp8,fp8,0,0.9336400032043457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,1,128,1,fp8,fp8,0,0.6848656177520752
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,2,128,1,float16,float16,0,0.7139423847198486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,2,128,1,fp8,fp8,0,0.6901968002319336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,4,128,1,float16,fp8,0,0.690876817703247
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,4,128,1,float16,float16,0,0.7257520198822022
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,8,128,1,float16,float16,0,0.73744797706604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,8,128,1,float16,fp8,0,0.7003615856170654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,96,128,1,float16,float16,0,0.5551296234130859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,8,128,1,fp8,fp8,0,0.6971039772033691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,1,128,1,float16,float16,0,0.3608831882476807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,96,128,1,fp8,fp8,0,0.4761695861816406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,1,128,1,float16,fp8,0,0.346942400932312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,1,128,1,fp8,fp8,0,0.3483760118484497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,2,128,1,float16,float16,0,0.36422080993652345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,2,128,1,float16,fp8,0,0.3451056003570557
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,2,128,1,fp8,fp8,0,0.3483936071395874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,4,128,1,float16,float16,0,0.3692064046859741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,4,128,1,float16,fp8,0,0.3515104055404663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,8,128,1,float16,float16,0,0.37523679733276366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,8,128,1,fp8,fp8,0,0.3556704044342041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,96,128,1,float16,float16,0,0.28521440029144285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,96,128,1,float16,fp8,0,0.2445823907852173
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,96,128,1,fp8,fp8,0,0.2436608076095581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,1,128,1,float16,float16,0,0.18825440406799315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,1,128,1,float16,fp8,0,0.17840479612350463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,1,128,1,fp8,fp8,0,0.17757279872894288
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,2,128,1,float16,float16,0,0.18914719820022582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,2,128,1,float16,fp8,0,0.178985595703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,2,128,1,fp8,fp8,0,0.17832159996032715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,4,128,1,float16,float16,0,0.1901792049407959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,4,128,1,float16,fp8,0,0.18034720420837402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,4,128,1,fp8,fp8,0,0.17989280223846435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,8,128,1,float16,float16,0,0.1944640040397644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,2,128,1,float16,fp8,0,0.6830656051635742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,8,128,1,float16,fp8,0,0.18240959644317628
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,96,128,1,float16,float16,0,0.1502560019493103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,96,4,128,1,fp8,fp8,0,0.6916319847106933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,96,128,1,float16,fp8,0,0.12843199968338012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,96,128,1,fp8,fp8,0,0.1288256049156189
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,1,128,1,float16,float16,0,0.09934080243110657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,1,128,1,fp8,fp8,0,0.09426079988479615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,2,128,1,float16,float16,0,0.09979680180549622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,1,128,1,float16,fp8,0,0.09448639750480652
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,2,128,1,float16,fp8,0,0.09491040110588074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,2,128,1,fp8,fp8,0,0.09456160068511962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,4,128,1,float16,float16,0,0.10070079565048218
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,4,128,1,float16,fp8,0,0.09562079906463623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,96,128,1,float16,fp8,0,0.4761087894439697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,4,128,1,fp8,fp8,0,0.09521440267562867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,8,128,1,float16,float16,0,0.10293439626693726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,8,128,1,float16,fp8,0,0.09679679870605469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,96,128,1,float16,float16,0,0.0808896005153656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,96,128,1,fp8,fp8,0,0.07037919759750366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,96,8,128,1,fp8,fp8,0,0.09713280200958252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,1,128,1,float16,float16,0,0.054548799991607666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,1,128,1,float16,fp8,0,0.04971520006656647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,1,128,1,fp8,fp8,0,0.050337600708007815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,2,128,1,float16,float16,0,0.055030399560928346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,2,128,1,float16,fp8,0,0.05020800232887268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,2,128,1,fp8,fp8,0,0.05056800246238709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,4,128,1,float16,float16,0,0.0561519980430603
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,4,128,1,float16,fp8,0,0.05061119794845581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,4,128,1,fp8,fp8,0,0.05076799988746643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,4,128,1,fp8,fp8,0,0.34898560047149657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,8,128,1,float16,float16,0,0.05792639851570129
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,8,128,1,float16,fp8,0,0.05160800218582153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,8,128,1,fp8,fp8,0,0.0513152003288269
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,96,128,1,float16,fp8,0,0.040387201309204104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,96,128,1,fp8,fp8,0,0.04013440012931824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,96,8,128,1,float16,fp8,0,0.35579040050506594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,1,128,1,float16,float16,0,0.02977760136127472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,1,128,1,fp8,fp8,0,0.029120001196861266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,2,128,1,float16,float16,0,0.02966879904270172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,2,128,1,float16,fp8,0,0.029110398888587952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,2,128,1,fp8,fp8,0,0.028905600309371948
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,4,128,1,float16,float16,0,0.029865598678588866
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,4,128,1,float16,fp8,0,0.028940799832344054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,4,128,1,fp8,fp8,0,0.029315200448036195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,8,128,1,float16,float16,0,0.03030880093574524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,8,128,1,float16,fp8,0,0.028999999165534973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,8,128,1,fp8,fp8,0,0.02919520139694214
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,96,8,128,1,fp8,fp8,0,0.18185919523239136
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,96,96,128,1,float16,fp8,0,0.0702351987361908
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,96,1,128,1,float16,float16,0,2.1704687118530273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,96,1,128,1,float16,fp8,0,2.22684326171875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,96,1,128,1,fp8,fp8,0,2.230700874328613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,96,128,1,float16,float16,0,0.04884639978408813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,96,1,128,1,float16,fp8,0,0.02924480140209198
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,96,2,128,1,float16,float16,0,2.185588836669922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,96,2,128,1,float16,fp8,0,2.231892776489258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,96,2,128,1,fp8,fp8,0,2.218532752990723
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,96,4,128,1,float16,float16,0,2.2234079360961916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,96,4,128,1,float16,fp8,0,2.2237600326538085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,96,4,128,1,fp8,fp8,0,2.222163200378418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,96,8,128,1,float16,float16,0,2.2787071228027345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,96,8,128,1,float16,fp8,0,2.2634544372558594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,1,128,1,float16,float16,0,1.0864751815795899
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,96,128,1,float16,fp8,0,1.5717967987060546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,96,128,1,fp8,fp8,0,1.558407974243164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,96,128,1,float16,float16,0,1.897542381286621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,96,8,128,1,fp8,fp8,0,2.275457572937012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,1,128,1,fp8,fp8,0,1.1143263816833495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,2,128,1,float16,float16,0,1.0912896156311036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,2,128,1,float16,fp8,0,1.1228480339050293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,2,128,1,fp8,fp8,0,1.1090543746948243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,4,128,1,float16,float16,0,1.1162799835205077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,4,128,1,float16,fp8,0,1.1305264472961425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,8,128,1,float16,float16,0,1.1486751556396484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,4,128,1,fp8,fp8,0,1.1229215621948243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,8,128,1,float16,fp8,0,1.131924819946289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,96,128,1,float16,float16,0,0.9530575752258301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,8,128,1,fp8,fp8,0,1.145246410369873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,96,128,1,fp8,fp8,0,0.7934127807617187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,1,128,1,float16,float16,0,0.5541999816894532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,1,128,1,float16,fp8,0,0.562879991531372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,1,128,1,fp8,fp8,0,0.5609903812408448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,2,128,1,float16,float16,0,0.5515120029449463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,2,128,1,fp8,fp8,0,0.5681072235107422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,4,128,1,float16,float16,0,0.5651264190673828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,4,128,1,float16,fp8,0,0.5677855968475342
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,4,128,1,fp8,fp8,0,0.5655039787292481
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,8,128,1,float16,float16,0,0.5767263889312744
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,8,128,1,float16,fp8,0,0.579747200012207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,8,128,1,fp8,fp8,0,0.5763440132141113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,96,128,1,float16,fp8,0,0.40076160430908203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,96,128,1,fp8,fp8,0,0.40057759284973143
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,1,128,1,float16,float16,0,0.2842672109603882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,1,128,1,float16,fp8,0,0.2876336097717285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,1,128,1,fp8,fp8,0,0.2858975887298584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,2,128,1,float16,float16,0,0.28319520950317384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,2,128,1,float16,fp8,0,0.28824639320373535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,2,128,1,fp8,fp8,0,0.28965280055999754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,4,128,1,float16,float16,0,0.28802878856658937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,4,128,1,float16,fp8,0,0.2889552116394043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,4,128,1,fp8,fp8,0,0.29058399200439455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,8,128,1,float16,float16,0,0.2990000009536743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,8,128,1,fp8,fp8,0,0.29328479766845705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,96,128,1,float16,fp8,0,0.20872800350189208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,96,128,1,float16,fp8,0,0.790064001083374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,1,128,1,float16,float16,0,0.14812159538269043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,96,128,1,fp8,fp8,0,0.20715200901031494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,1,128,1,float16,fp8,0,0.1494879961013794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,1,128,1,fp8,fp8,0,0.1504591941833496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,2,128,1,float16,float16,0,0.1480512022972107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,2,128,1,float16,fp8,0,0.15024319887161255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,2,128,1,fp8,fp8,0,0.14967039823532105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,96,2,128,1,float16,fp8,0,0.5680208206176758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,96,1,128,1,float16,fp8,0,1.1143327713012696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,4,128,1,float16,float16,0,0.1503551959991455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,4,128,1,float16,fp8,0,0.15037280321121216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,4,128,1,fp8,fp8,0,0.1519376039505005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,8,128,1,float16,fp8,0,0.15306880474090576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,8,128,1,fp8,fp8,0,0.15270880460739136
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,8,128,1,float16,float16,0,0.1550160050392151
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,96,128,1,float16,float16,0,0.13420480489730835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,96,128,1,fp8,fp8,0,0.11052800416946411
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,96,128,1,float16,float16,0,0.4859007835388184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,1,128,1,float16,fp8,0,0.08192960023880005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,2,128,1,float16,fp8,0,0.08249120116233825
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,2,128,1,float16,float16,0,0.08110719919204712
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,2,128,1,fp8,fp8,0,0.08212159872055054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,4,128,1,float16,float16,0,0.0815280020236969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,4,128,1,float16,fp8,0,0.08279520273208618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,4,128,1,fp8,fp8,0,0.08221920132637024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,8,128,1,float16,float16,0,0.08390399813652039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,8,128,1,float16,fp8,0,0.08364800214767457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,8,128,1,fp8,fp8,0,0.08397600054740906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,96,128,1,float16,float16,0,0.07253760099411011
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,96,128,1,float16,fp8,0,0.06181120276451111
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,96,128,1,fp8,fp8,0,0.06187199950218201
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,1,128,1,float16,float16,0,0.04593600034713745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,1,128,1,float16,fp8,0,0.043556800484657286
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,2,128,1,float16,float16,0,0.046055999398231504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,2,128,1,float16,fp8,0,0.043614399433135984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,2,128,1,fp8,fp8,0,0.04370720088481903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,4,128,1,float16,float16,0,0.04694879949092865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,4,128,1,float16,fp8,0,0.04405120015144348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,4,128,1,fp8,fp8,0,0.04415040016174317
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,8,128,1,float16,float16,0,0.04830079972743988
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,8,128,1,float16,fp8,0,0.04466400146484375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,8,128,1,fp8,fp8,0,0.04478079974651337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,96,128,1,float16,float16,0,0.04378080070018768
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,96,128,1,float16,fp8,0,0.03447679877281189
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,96,128,1,fp8,fp8,0,0.034796801209449765
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,1,128,1,float16,float16,0,0.024883200228214265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,1,128,1,float16,fp8,0,0.025972801446914672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,96,128,1,float16,fp8,0,0.1111024022102356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,1,128,1,float16,float16,0,0.08000959753990174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,1,128,1,fp8,fp8,0,0.02580159902572632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,96,1,128,1,fp8,fp8,0,0.08101279735565185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,2,128,1,fp8,fp8,0,0.02603999972343445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,4,128,1,float16,float16,0,0.025379198789596557
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,4,128,1,float16,fp8,0,0.02614560127258301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,4,128,1,fp8,fp8,0,0.025894400477409363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,8,128,1,float16,float16,0,0.02555840015411377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,8,128,1,float16,fp8,0,0.026499199867248534
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,96,128,1,float16,float16,0,0.025596800446510314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,96,128,1,float16,fp8,0,0.021622399985790252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,96,128,1,fp8,fp8,0,0.021673600375652313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,1,128,1,float16,float16,0,0.018532800674438476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,1,128,1,float16,fp8,0,0.01728159934282303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,1,128,1,fp8,fp8,0,0.017395199835300447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,2,128,1,float16,float16,0,0.018464000523090364
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,96,8,128,1,float16,fp8,0,0.2927824020385742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,2,128,1,float16,fp8,0,0.01759520024061203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,2,128,1,fp8,fp8,0,0.01748639941215515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,96,96,128,1,float16,float16,0,0.2508431911468506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,4,128,1,float16,fp8,0,0.017451199889183044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,8,128,1,float16,float16,0,0.018719999492168425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,4,128,1,fp8,fp8,0,0.01751199960708618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,8,128,1,float16,fp8,0,0.01770080029964447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,8,128,1,fp8,fp8,0,0.017484800517559053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,96,1,128,1,float16,float16,0,1.0011311531066895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,96,1,128,1,float16,fp8,0,1.0653792381286622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,2,128,1,float16,fp8,0,0.026067200303077697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,8,128,1,fp8,fp8,0,0.025966399908065797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,96,1,128,1,fp8,fp8,0,1.060579204559326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,96,2,128,1,float16,fp8,0,1.0677935600280761
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,96,4,128,1,float16,float16,0,0.018721599876880646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,96,4,128,1,float16,float16,0,1.0295071601867676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,96,4,128,1,float16,fp8,0,1.070695972442627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,96,4,128,1,fp8,fp8,0,1.0740480422973633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,96,8,128,1,float16,float16,0,1.0568783760070801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,96,8,128,1,float16,fp8,0,1.1020015716552733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,96,2,128,1,float16,float16,0,0.025383999943733214
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,96,128,1,float16,float16,0,0.9004863739013672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,96,8,128,1,fp8,fp8,0,1.097275161743164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,96,128,1,float16,fp8,0,0.7946959972381592
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,1,128,1,float16,float16,0,0.5096159934997558
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,96,128,1,fp8,fp8,0,0.7934720039367675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,1,128,1,float16,fp8,0,0.5368912220001221
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,1,128,1,fp8,fp8,0,0.5353184223175049
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,2,128,1,float16,float16,0,0.5116367816925049
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,2,128,1,float16,fp8,0,0.538972806930542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,2,128,1,fp8,fp8,0,0.5372576236724853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,4,128,1,float16,float16,0,0.5181856155395508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,96,2,128,1,fp8,fp8,0,1.073140811920166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,4,128,1,fp8,fp8,0,0.5418655872344971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,8,128,1,float16,float16,0,0.5351103782653809
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,8,128,1,float16,fp8,0,0.5551280021667481
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,8,128,1,fp8,fp8,0,0.5550479888916016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,96,128,1,float16,float16,0,0.46041121482849123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,1,128,1,float16,float16,0,0.26143999099731446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,96,128,1,float16,fp8,0,0.4010176181793213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,96,128,1,fp8,fp8,0,0.401043176651001
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,1,128,1,float16,fp8,0,0.2755743980407715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,1,128,1,fp8,fp8,0,0.27623040676116944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,2,128,1,float16,float16,0,0.26202878952026365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,2,128,1,float16,fp8,0,0.2744175910949707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,2,128,1,fp8,fp8,0,0.2757920026779175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,4,128,1,float16,float16,0,0.26729280948638917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,4,128,1,float16,fp8,0,0.27818880081176756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,8,128,1,float16,float16,0,0.27434558868408204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,8,128,1,float16,fp8,0,0.2854111909866333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,8,128,1,fp8,fp8,0,0.2853615999221802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,96,128,1,float16,float16,0,0.23728959560394286
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,96,128,1,float16,fp8,0,0.20819520950317383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,96,128,1,fp8,fp8,0,0.2080751895904541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,1,128,1,float16,fp8,0,0.14327679872512816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,96,2,128,1,float16,float16,0,1.01255521774292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,2,128,1,float16,float16,0,0.13797279596328735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,2,128,1,float16,fp8,0,0.14401439428329468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,2,128,1,fp8,fp8,0,0.14301760196685792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,96,4,128,1,float16,fp8,0,0.5424255847930908
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,4,128,1,fp8,fp8,0,0.14448000192642213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,8,128,1,float16,float16,0,0.1441167950630188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,8,128,1,float16,fp8,0,0.14680960178375244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,8,128,1,fp8,fp8,0,0.14630240201950073
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,96,128,1,float16,float16,0,0.12894400358200073
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,96,128,1,float16,fp8,0,0.10884959697723388
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,96,4,128,1,fp8,fp8,0,0.2768064022064209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,96,128,1,fp8,fp8,0,0.10873119831085205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,1,128,1,float16,float16,0,0.07533599734306336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,1,128,1,float16,fp8,0,0.07737600207328796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,1,128,1,fp8,fp8,0,0.07637119889259339
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,2,128,1,float16,float16,0,0.07586240172386169
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,2,128,1,float16,fp8,0,0.07689120173454285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,2,128,1,fp8,fp8,0,0.0773904025554657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,4,128,1,float16,float16,0,0.07571200132369996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,4,128,1,float16,fp8,0,0.07767840027809143
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,1,128,1,float16,float16,0,0.13697119951248168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,4,128,1,fp8,fp8,0,0.07736480236053467
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,8,128,1,float16,float16,0,0.07932159900665284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,1,128,1,fp8,fp8,0,0.1438447952270508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,8,128,1,float16,fp8,0,0.07900480031967164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,96,128,1,float16,float16,0,0.06970080137252807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,1,128,1,float16,float16,0,0.04251520037651062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,96,128,1,fp8,fp8,0,0.0583728015422821
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,1,128,1,float16,fp8,0,0.03884960114955902
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,2,128,1,float16,float16,0,0.04267520010471344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,2,128,1,float16,fp8,0,0.039087998867034915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,2,128,1,fp8,fp8,0,0.0386927992105484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,4,128,1,float16,float16,0,0.04323199987411499
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,4,128,1,float16,fp8,0,0.03912160098552704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,4,128,1,fp8,fp8,0,0.03943040072917938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,8,128,1,float16,float16,0,0.044228801131248476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,8,128,1,float16,fp8,0,0.04046080112457275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,8,128,1,fp8,fp8,0,0.0407696008682251
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,96,128,1,float16,float16,0,0.04198560118675232
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,96,128,1,float16,fp8,0,0.030003198981285097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,96,128,1,fp8,fp8,0,0.03083840012550354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,1,128,1,float16,float16,0,0.0218639999628067
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,1,128,1,float16,fp8,0,0.022881600260734557
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,1,128,1,fp8,fp8,0,0.022758400440216063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,2,128,1,float16,float16,0,0.02218399941921234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,2,128,1,fp8,fp8,0,0.022830399870872497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,4,128,1,float16,float16,0,0.022363199293613432
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,4,128,1,float16,fp8,0,0.022976000607013703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,4,128,1,fp8,fp8,0,0.023052799701690673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,8,128,1,float16,float16,0,0.02272319942712784
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,8,128,1,float16,fp8,0,0.023012800514698027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,8,128,1,fp8,fp8,0,0.02295999974012375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,96,128,1,float16,float16,0,0.024663999676704407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,96,128,1,float16,fp8,0,0.019499200582504272
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,96,128,1,fp8,fp8,0,0.019508799910545348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,1,128,1,float16,float16,0,0.016993600130081176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,1,128,1,float16,fp8,0,0.017564800381660462
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,1,128,1,fp8,fp8,0,0.017528000473976135
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,96,1,128,1,fp8,fp8,0,0.04366079866886139
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,96,8,128,1,fp8,fp8,0,0.07851679921150208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,2,128,1,float16,float16,0,0.016913600265979767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,2,128,1,fp8,fp8,0,0.017556799948215483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,4,128,1,float16,float16,0,0.017183999717235564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,4,128,1,float16,fp8,0,0.01748960018157959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,4,128,1,fp8,fp8,0,0.017657600343227386
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,8,128,1,float16,float16,0,0.01732639968395233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,8,128,1,float16,fp8,0,0.017740799486637114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,8,128,1,fp8,fp8,0,0.017552000284194947
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,4,128,1,float16,float16,0,0.13998559713363648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,96,128,1,float16,float16,0,0.015300799906253815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,96,128,1,float16,fp8,0,0.014129599928855896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,96,128,1,fp8,fp8,0,0.013859200477600097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,1,128,1,float16,fp8,0,0.012724800407886505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,2,128,1,float16,float16,0,0.012307199835777282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,2,128,1,float16,fp8,0,0.012723200023174286
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,2,128,1,fp8,fp8,0,0.012827199697494508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,4,128,1,float16,float16,0,0.01250080019235611
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,4,128,1,float16,fp8,0,0.012755200266838074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,4,128,1,fp8,fp8,0,0.012588800489902496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,96,2,128,1,float16,fp8,0,0.022720000147819518
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,8,128,1,float16,float16,0,0.012600000202655792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,8,128,1,float16,fp8,0,0.012639999389648438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,96,1,128,1,float16,float16,0,0.5119152069091797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,96,2,128,1,float16,fp8,0,0.017387199401855468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,1,128,1,fp8,fp8,0,0.038808000087738034
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,96,4,128,1,float16,fp8,0,0.1434991955757141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,1,128,1,float16,float16,0,0.012167999893426895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,96,1,128,1,float16,fp8,0,0.5363279819488526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,1,128,1,fp8,fp8,0,0.01276479959487915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,96,1,128,1,fp8,fp8,0,0.5355648040771485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,96,2,128,1,float16,float16,0,0.5130224227905273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,96,2,128,1,float16,fp8,0,0.5367775917053222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,96,4,128,1,float16,float16,0,0.5226687908172607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,96,4,128,1,float16,fp8,0,0.5402448177337646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,96,8,128,1,fp8,fp8,0,0.012572799623012543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,96,4,128,1,fp8,fp8,0,0.5422287940979004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,96,8,128,1,float16,float16,0,0.5370863914489746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,96,8,128,1,float16,fp8,0,0.5571135997772216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,96,8,128,1,fp8,fp8,0,0.5532479763031006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,96,128,1,float16,float16,0,0.5372176170349121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,1,128,1,float16,float16,0,0.26064798831939695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,96,128,1,fp8,fp8,0,0.4365856170654297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,1,128,1,float16,fp8,0,0.2746272087097168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,1,128,1,fp8,fp8,0,0.27413120269775393
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,2,128,1,float16,float16,0,0.262608003616333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,2,128,1,float16,fp8,0,0.27450559139251707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,2,128,1,fp8,fp8,0,0.27593441009521485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,4,128,1,float16,float16,0,0.2673520088195801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,4,128,1,float16,fp8,0,0.2779808044433594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,4,128,1,fp8,fp8,0,0.2759552001953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,8,128,1,float16,float16,0,0.27364320755004884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,8,128,1,float16,fp8,0,0.28419039249420164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,8,128,1,fp8,fp8,0,0.2840240001678467
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,96,128,1,float16,float16,0,0.27544000148773196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,96,128,1,float16,fp8,0,0.22545280456542968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,96,128,1,fp8,fp8,0,0.22481119632720947
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,1,128,1,float16,float16,0,0.13883839845657348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,1,128,1,float16,fp8,0,0.14144480228424072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,1,128,1,fp8,fp8,0,0.14166239500045777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,2,128,1,float16,float16,0,0.13936159610748292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,2,128,1,float16,fp8,0,0.14459680318832396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,2,128,1,fp8,fp8,0,0.14299360513687134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,4,128,1,float16,float16,0,0.13956480026245116
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,4,128,1,float16,fp8,0,0.14233280420303346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,4,128,1,fp8,fp8,0,0.1443711996078491
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,8,128,1,float16,float16,0,0.1446720004081726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,8,128,1,float16,fp8,0,0.14602240324020385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,96,8,128,1,fp8,fp8,0,0.14991359710693358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,96,128,1,float16,fp8,0,0.11900800466537476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,96,2,128,1,fp8,fp8,0,0.5372576236724853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,96,128,1,fp8,fp8,0,0.11888320446014404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,1,128,1,float16,float16,0,0.0755952000617981
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,1,128,1,fp8,fp8,0,0.0774671971797943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,2,128,1,float16,fp8,0,0.07691519856452941
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,2,128,1,fp8,fp8,0,0.07785760164260865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,4,128,1,float16,float16,0,0.0776528000831604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,4,128,1,float16,fp8,0,0.07774080038070678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,4,128,1,fp8,fp8,0,0.07762240171432495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,8,128,1,float16,float16,0,0.07919840216636657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,96,96,128,1,float16,fp8,0,0.43934078216552735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,8,128,1,fp8,fp8,0,0.07967680096626281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,96,128,1,float16,fp8,0,0.062041598558425906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,96,128,1,fp8,fp8,0,0.06222079992294312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,1,128,1,float16,float16,0,0.042847999930381776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,1,128,1,float16,fp8,0,0.03922080099582672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,1,128,1,fp8,fp8,0,0.03926720023155213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,2,128,1,float16,float16,0,0.04288640022277832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,2,128,1,float16,fp8,0,0.03923999965190887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,96,96,128,1,float16,fp8,0,0.0583728015422821
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,2,128,1,fp8,fp8,0,0.0393312007188797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,4,128,1,float16,float16,0,0.04350079894065857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,4,128,1,float16,fp8,0,0.03980799913406372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,4,128,1,fp8,fp8,0,0.03966239988803864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,8,128,1,float16,float16,0,0.04481599926948547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,8,128,1,float16,fp8,0,0.04074560105800629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,8,128,1,fp8,fp8,0,0.04118080139160156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,96,128,1,float16,float16,0,0.0459663987159729
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,1,128,1,float16,float16,0,0.021852800250053407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,1,128,1,float16,fp8,0,0.02287680059671402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,1,128,1,fp8,fp8,0,0.022888000309467315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,2,128,1,float16,float16,0,0.021940800547599792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,2,128,1,float16,fp8,0,0.02268960028886795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,2,128,1,fp8,fp8,0,0.02269600033760071
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,1,128,1,float16,fp8,0,0.07705600261688232
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,4,128,1,float16,float16,0,0.02192160040140152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,4,128,1,float16,fp8,0,0.02292319983243942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,2,128,1,float16,float16,0,0.07678560018539429
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,8,128,1,float16,float16,0,0.022339199483394623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,8,128,1,float16,fp8,0,0.02288320064544678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,8,128,1,fp8,fp8,0,0.023052799701690673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,96,128,1,float16,float16,0,0.02446399927139282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,96,128,1,float16,fp8,0,0.01765599995851517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,96,128,1,fp8,fp8,0,0.017768000066280366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,1,128,1,float16,float16,0,0.01669919937849045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,1,128,1,float16,fp8,0,0.017455999553203583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,8,128,1,float16,fp8,0,0.07949600219726563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,2,128,1,float16,float16,0,0.016806399822235106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,2,128,1,float16,fp8,0,0.01727679967880249
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,2,128,1,fp8,fp8,0,0.0174127995967865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,4,128,1,float16,float16,0,0.01679999977350235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,4,128,1,float16,fp8,0,0.017422400414943695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,96,96,128,1,float16,float16,0,0.07735520005226135
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,8,128,1,float16,float16,0,0.016942399740219116
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,8,128,1,float16,fp8,0,0.017582400143146514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,8,128,1,fp8,fp8,0,0.017470400035381316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,96,128,1,float16,float16,0,0.015727999806404113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,96,128,1,float16,fp8,0,0.014129599928855896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,96,128,1,fp8,fp8,0,0.014312000572681427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,1,128,1,float16,float16,0,0.01218079999089241
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,1,128,1,float16,fp8,0,0.012936000525951386
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,1,128,1,fp8,fp8,0,0.012481600046157837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,2,128,1,float16,float16,0,0.012044800072908401
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,2,128,1,float16,fp8,0,0.01247519999742508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,2,128,1,fp8,fp8,0,0.012555199861526489
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,4,128,1,float16,float16,0,0.012241599708795547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,96,128,1,float16,fp8,0,0.031774398684501645
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,96,128,1,fp8,fp8,0,0.031839999556541446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,4,128,1,fp8,fp8,0,0.012703999876976013
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,8,128,1,float16,float16,0,0.01239679977297783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,8,128,1,float16,fp8,0,0.012571200728416443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,8,128,1,fp8,fp8,0,0.012809599936008453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,96,128,1,float16,float16,0,0.012707200646400452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,96,128,1,float16,fp8,0,0.01148959994316101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,96,96,128,1,float16,float16,0,0.14572160243988036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,1,128,1,float16,fp8,0,0.012531200051307678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,96,4,128,1,fp8,fp8,0,0.0227743998169899
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,1,128,1,fp8,fp8,0,0.012451200187206269
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,2,128,1,float16,float16,0,0.011985599994659424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,2,128,1,float16,fp8,0,0.012430399656295776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,2,128,1,fp8,fp8,0,0.012441600114107132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,4,128,1,float16,float16,0,0.012051200121641159
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,4,128,1,fp8,fp8,0,0.012406399846076966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,4,128,1,float16,fp8,0,0.0124719999730587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,8,128,1,float16,float16,0,0.01202239990234375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,1,128,1,fp8,fp8,0,0.017382399737834932
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,8,128,1,float16,fp8,0,0.012438400089740754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,8,128,1,fp8,fp8,0,0.012377600371837615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,96,4,128,1,fp8,fp8,0,0.017417599260807038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,96,1,128,1,float16,float16,0,0.2615855932235718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,96,1,128,1,float16,fp8,0,0.2764575958251953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,96,1,128,1,fp8,fp8,0,0.27322399616241455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,96,2,128,1,float16,float16,0,0.2638576030731201
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,96,2,128,1,float16,fp8,0,0.27523040771484375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,96,2,128,1,fp8,fp8,0,0.27519679069519043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,96,4,128,1,float16,float16,0,0.2665231943130493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,96,4,128,1,fp8,fp8,0,0.2781584024429321
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,96,8,128,1,float16,float16,0,0.2741008043289185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,96,8,128,1,float16,fp8,0,0.2835968017578125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,96,8,128,1,fp8,fp8,0,0.2840368032455444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,96,128,1,float16,float16,0,0.3926431894302368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,96,4,128,1,float16,fp8,0,0.012801599502563477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,96,128,1,fp8,fp8,0,0.3245311975479126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,1,128,1,float16,float16,0,0.13815040588378907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,1,128,1,float16,fp8,0,0.14244799613952636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,1,128,1,fp8,fp8,0,0.14384640455245973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,2,128,1,float16,float16,0,0.13941119909286498
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,96,128,1,fp8,fp8,0,0.011521600186824799
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,2,128,1,float16,fp8,0,0.14250559806823732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,96,1,128,1,float16,float16,0,0.012191999703645706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,4,128,1,float16,float16,0,0.14175839424133302
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,4,128,1,float16,fp8,0,0.14373279809951783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,4,128,1,fp8,fp8,0,0.1433743953704834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,8,128,1,float16,float16,0,0.143339204788208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,8,128,1,float16,fp8,0,0.14895039796829224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,8,128,1,fp8,fp8,0,0.1471567988395691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,96,128,1,float16,float16,0,0.20525760650634767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,96,128,1,float16,fp8,0,0.16977280378341675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,96,128,1,fp8,fp8,0,0.1696768045425415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,1,128,1,float16,fp8,0,0.07825599908828736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,1,128,1,fp8,fp8,0,0.07804480195045471
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,2,128,1,float16,float16,0,0.07722880244255066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,2,128,1,float16,fp8,0,0.07802559733390808
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,2,128,1,fp8,fp8,0,0.07870240211486816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,4,128,1,float16,float16,0,0.07814559936523438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,4,128,1,float16,fp8,0,0.07903839945793152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,8,128,1,float16,float16,0,0.08001120090484619
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,8,128,1,float16,fp8,0,0.08061760067939758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,8,128,1,fp8,fp8,0,0.08051040172576904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,96,128,1,float16,float16,0,0.107750403881073
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,96,128,1,float16,fp8,0,0.08992480039596558
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,96,128,1,fp8,fp8,0,0.08967199921607971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,1,128,1,float16,fp8,0,0.0390720009803772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,96,4,128,1,float16,fp8,0,0.27849440574645995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,1,128,1,fp8,fp8,0,0.039022400975227356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,2,128,1,float16,float16,0,0.04313920140266418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,2,128,1,fp8,fp8,0,0.039017599821090695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,4,128,1,float16,float16,0,0.04330880045890808
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,4,128,1,float16,fp8,0,0.03931359946727753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,96,128,1,float16,fp8,0,0.3222640037536621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,4,128,1,fp8,fp8,0,0.03939040005207062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,8,128,1,float16,float16,0,0.04437119960784912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,8,128,1,float16,fp8,0,0.04127840101718903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,96,128,1,float16,float16,0,0.06050080060958862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,96,128,1,float16,fp8,0,0.046907201409339905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,96,128,1,fp8,fp8,0,0.04532159864902496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,1,128,1,float16,float16,0,0.021784000098705292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,1,128,1,float16,fp8,0,0.022891199588775633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,1,128,1,fp8,fp8,0,0.022702400386333466
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,96,2,128,1,fp8,fp8,0,0.14300800561904908
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,2,128,1,float16,fp8,0,0.02263839989900589
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,4,128,1,float16,float16,0,0.022044800221920013
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,4,128,1,float16,fp8,0,0.02281759977340698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,4,128,1,fp8,fp8,0,0.022838400304317476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,1,128,1,float16,float16,0,0.07765759825706482
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,8,128,1,float16,float16,0,0.022172799706459044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,8,128,1,float16,fp8,0,0.022728000581264497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,8,128,1,fp8,fp8,0,0.022676800191402436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,96,4,128,1,fp8,fp8,0,0.07916640043258667
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,96,128,1,float16,float16,0,0.030329599976539612
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,96,128,1,float16,fp8,0,0.02473759949207306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,96,128,1,fp8,fp8,0,0.02433760017156601
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,1,128,1,float16,float16,0,0.01690720021724701
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,1,128,1,float16,fp8,0,0.017608000338077544
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,1,128,1,fp8,fp8,0,0.01746080070734024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,2,128,1,float16,float16,0,0.016921600699424742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,2,128,1,float16,fp8,0,0.017478400468826295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,2,128,1,fp8,fp8,0,0.017476800084114074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,4,128,1,float16,float16,0,0.01696320027112961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,4,128,1,float16,fp8,0,0.01759839951992035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,4,128,1,fp8,fp8,0,0.017315199971199034
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,8,128,1,float16,float16,0,0.017217600345611574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,8,128,1,float16,fp8,0,0.017401599884033205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,96,8,128,1,fp8,fp8,0,0.01751199960708618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,96,128,1,float16,float16,0,0.018113599717617036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,96,128,1,float16,fp8,0,0.016598400473594666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,96,128,1,fp8,fp8,0,0.016603200137615202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,2,128,1,float16,fp8,0,0.03932639956474304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,1,128,1,float16,fp8,0,0.012505599856376648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,1,128,1,fp8,fp8,0,0.012622399628162384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,2,128,1,float16,fp8,0,0.012675200402736665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,2,128,1,fp8,fp8,0,0.01247360035777092
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,4,128,1,float16,float16,0,0.01204639971256256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,8,128,1,fp8,fp8,0,0.04063520133495331
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,4,128,1,fp8,fp8,0,0.012443199753761292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,8,128,1,float16,float16,0,0.012308800220489502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,8,128,1,float16,fp8,0,0.012600000202655792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,2,128,1,float16,float16,0,0.02220959961414337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,8,128,1,fp8,fp8,0,0.012628799676895142
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,96,128,1,float16,float16,0,0.015227200090885162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,96,2,128,1,fp8,fp8,0,0.022785599529743194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,96,128,1,float16,fp8,0,0.013713599741458892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,96,128,1,fp8,fp8,0,0.013654400408267976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,1,128,1,float16,float16,0,0.01215839982032776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,1,128,1,float16,fp8,0,0.012379200011491776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,1,128,1,fp8,fp8,0,0.012587200105190276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,2,128,1,float16,fp8,0,0.012379200011491776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,2,128,1,fp8,fp8,0,0.012408000230789185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,4,128,1,float16,float16,0,0.012030400335788727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,4,128,1,float16,fp8,0,0.01242239996790886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,8,128,1,float16,float16,0,0.012062399834394454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,8,128,1,float16,fp8,0,0.012299200147390365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,8,128,1,fp8,fp8,0,0.012459199875593185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,96,128,1,float16,float16,0,0.012689599394798278
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,96,128,1,float16,fp8,0,0.011224000155925751
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,96,128,1,fp8,fp8,0,0.011358399689197541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,1,128,1,float16,float16,0,0.01196800023317337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,1,128,1,float16,fp8,0,0.012300799787044524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,96,1,128,1,float16,float16,0,0.042844799160957334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,1,128,1,fp8,fp8,0,0.012265600264072418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,2,128,1,float16,fp8,0,0.012296000123023986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,2,128,1,fp8,fp8,0,0.012247999757528305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,4,128,1,float16,float16,0,0.0118928000330925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,4,128,1,float16,fp8,0,0.012334399670362473
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,1,128,1,float16,float16,0,0.012198399752378464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,4,128,1,fp8,fp8,0,0.012454400211572647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,8,128,1,float16,float16,0,0.011979199945926666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,2,128,1,float16,float16,0,0.012328000366687774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,8,128,1,fp8,fp8,0,0.012542399764060973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,96,1,128,1,float16,float16,0,0.1384592056274414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,96,1,128,1,float16,fp8,0,0.1435487985610962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,96,4,128,1,float16,fp8,0,0.012678399682044983
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,96,1,128,1,fp8,fp8,0,0.14369599819183348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,96,2,128,1,float16,float16,0,0.13885600566864015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,96,2,128,1,fp8,fp8,0,0.1450368046760559
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,96,4,128,1,float16,float16,0,0.14059200286865234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,96,4,128,1,float16,fp8,0,0.1457520008087158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,96,4,128,1,fp8,fp8,0,0.14463039636611938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,2,128,1,float16,float16,0,0.012135999649763108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,96,8,128,1,float16,float16,0,0.17186399698257446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,96,4,128,1,fp8,fp8,0,0.012352000176906585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,96,8,128,1,float16,fp8,0,0.17853440046310426
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,96,8,128,1,fp8,fp8,0,0.17843519449234008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,96,128,1,float16,float16,0,0.3330127954483032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,1,128,1,float16,float16,0,0.07707520127296448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,96,128,1,float16,fp8,0,0.28124160766601564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,96,128,1,fp8,fp8,0,0.2825344085693359
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,1,128,1,fp8,fp8,0,0.07820640206336975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,2,128,1,float16,float16,0,0.07595040202140808
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,2,128,1,fp8,fp8,0,0.07882559895515442
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,2,128,1,float16,fp8,0,0.07936639785766601
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,4,128,1,float16,float16,0,0.07728639841079712
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,4,128,1,fp8,fp8,0,0.07993599772453308
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,8,128,1,float16,float16,0,0.09258080124855042
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,2,128,1,float16,float16,0,0.011902400106191636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,8,128,1,fp8,fp8,0,0.09673280119895936
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,96,128,1,float16,float16,0,0.1707487940788269
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,96,128,1,float16,fp8,0,0.14593440294265747
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,96,128,1,fp8,fp8,0,0.146451199054718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,96,8,128,1,float16,fp8,0,0.012535999715328216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,1,128,1,fp8,fp8,0,0.0391072005033493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,2,128,1,float16,float16,0,0.043433600664138795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,2,128,1,float16,fp8,0,0.03922399878501892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,2,128,1,fp8,fp8,0,0.03967519998550415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,4,128,1,float16,float16,0,0.04467839896678925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,4,128,1,float16,fp8,0,0.04002079963684082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,96,2,128,1,float16,fp8,0,0.14287519454956055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,4,128,1,fp8,fp8,0,0.03991360068321228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,8,128,1,float16,float16,0,0.052344000339508055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,8,128,1,float16,fp8,0,0.0498879998922348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,8,128,1,fp8,fp8,0,0.049660798907279965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,96,128,1,float16,float16,0,0.09369919896125793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,96,128,1,float16,fp8,0,0.07472479939460755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,96,128,1,fp8,fp8,0,0.07531840205192566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,1,128,1,float16,float16,0,0.0220880001783371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,1,128,1,float16,fp8,0,0.023155200481414794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,1,128,1,fp8,fp8,0,0.022993600368499754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,2,128,1,float16,float16,0,0.022124800086021423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,2,128,1,float16,fp8,0,0.022836799919605254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,2,128,1,fp8,fp8,0,0.023123200237751006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,4,128,1,float16,float16,0,0.022148799896240235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,4,128,1,float16,fp8,0,0.0230880007147789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,4,128,1,fp8,fp8,0,0.02295520007610321
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,8,128,1,float16,float16,0,0.02584480047225952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,8,128,1,float16,fp8,0,0.02715199887752533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,96,8,128,1,fp8,fp8,0,0.027244800329208375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,96,128,1,float16,float16,0,0.047249600291252136
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,96,128,1,float16,fp8,0,0.03914720118045807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,96,128,1,fp8,fp8,0,0.039297598600387576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,1,128,1,float16,float16,0,0.01679680049419403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,1,128,1,float16,fp8,0,0.01746080070734024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,1,128,1,fp8,fp8,0,0.01740480065345764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,2,128,1,float16,float16,0,0.016857600212097167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,2,128,1,float16,fp8,0,0.017467199265956877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,2,128,1,fp8,fp8,0,0.01743520051240921
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,8,128,1,float16,fp8,0,0.09700160026550293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,4,128,1,float16,fp8,0,0.017417599260807038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,4,128,1,fp8,fp8,0,0.017510400712490083
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,8,128,1,float16,float16,0,0.01661760061979294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,8,128,1,float16,fp8,0,0.017569600045681
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,8,128,1,fp8,fp8,0,0.01733279973268509
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,96,128,1,float16,float16,0,0.02584159970283508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,96,128,1,float16,fp8,0,0.02359360009431839
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,96,128,1,fp8,fp8,0,0.023664000630378722
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,1,128,1,float16,fp8,0,0.039259201288223265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,1,128,1,float16,fp8,0,0.012670400738716125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,1,128,1,fp8,fp8,0,0.01271200031042099
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,2,128,1,float16,fp8,0,0.012691199779510498
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,2,128,1,fp8,fp8,0,0.01273919939994812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,4,128,1,float16,float16,0,0.012249600142240524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,4,128,1,float16,fp8,0,0.012664000689983367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,4,128,1,fp8,fp8,0,0.012708799540996551
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,8,128,1,float16,float16,0,0.012595200538635254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,8,128,1,float16,fp8,0,0.012758399546146392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,8,128,1,fp8,fp8,0,0.012745599448680877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,96,128,1,float16,float16,0,0.017803199589252472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,96,128,1,float16,fp8,0,0.016331200301647187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,96,128,1,fp8,fp8,0,0.016358399391174318
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,1,128,1,float16,float16,0,0.011948800086975098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,1,128,1,float16,fp8,0,0.012352000176906585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,1,128,1,fp8,fp8,0,0.01231039986014366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,2,128,1,float16,float16,0,0.011966399848461151
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,2,128,1,float16,fp8,0,0.01239359974861145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,1,128,1,float16,fp8,0,0.07882879972457886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,2,128,1,fp8,fp8,0,0.012467200309038163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,4,128,1,float16,float16,0,0.012195199728012085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,4,128,1,float16,fp8,0,0.012464000284671784
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,4,128,1,fp8,fp8,0,0.012486399710178375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,8,128,1,float16,float16,0,0.011876799911260606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,8,128,1,float16,fp8,0,0.012444800138473511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,96,128,1,float16,float16,0,0.01467519998550415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,96,4,128,1,float16,float16,0,0.017000000178813934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,96,128,1,float16,fp8,0,0.013369600474834441
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,96,128,1,fp8,fp8,0,0.01350879967212677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,1,128,1,float16,fp8,0,0.012355200201272964
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,96,1,128,1,float16,float16,0,0.04307039976119995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,1,128,1,fp8,fp8,0,0.012336000055074691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,2,128,1,float16,float16,0,0.011902400106191636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,1,128,1,float16,float16,0,0.012409599870443344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,2,128,1,fp8,fp8,0,0.012188799679279327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,4,128,1,float16,float16,0,0.011902400106191636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,96,2,128,1,float16,float16,0,0.012296000123023986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,4,128,1,fp8,fp8,0,0.0122079998254776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,8,128,1,float16,float16,0,0.01178399994969368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,8,128,1,float16,fp8,0,0.012121599912643433
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,8,128,1,fp8,fp8,0,0.012300799787044524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,96,128,1,float16,fp8,0,0.01135680004954338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,96,128,1,fp8,fp8,0,0.011339200288057327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,1,128,1,float16,float16,0,0.011811199784278869
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,1,128,1,float16,fp8,0,0.011972799897193909
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,1,128,1,fp8,fp8,0,0.011974400281906128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,2,128,1,float16,float16,0,0.011828800290822982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,2,128,1,float16,fp8,0,0.012111999839544297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,2,128,1,fp8,fp8,0,0.012115199863910676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,4,128,1,float16,float16,0,0.01189119964838028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,4,128,1,float16,fp8,0,0.0121568001806736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,4,128,1,fp8,fp8,0,0.012051200121641159
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,8,128,1,float16,float16,0,0.011665599793195725
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,8,128,1,float16,fp8,0,0.012049599736928939
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,8,128,1,fp8,fp8,0,0.011924800276756287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,96,1,128,1,float16,float16,0,0.020559999346733093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,96,1,128,1,float16,fp8,0,0.021766400337219237
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,96,1,128,1,fp8,fp8,0,0.021862399578094483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,96,2,128,1,float16,float16,0,0.02385119944810867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,96,4,128,1,float16,fp8,0,0.07904639840126038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,96,8,128,1,fp8,fp8,0,0.012270399928092956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,1,128,1,float16,float16,0,0.011790399998426437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,96,4,128,1,float16,float16,0,0.034948799014091494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,96,4,128,1,float16,fp8,0,0.034836798906326294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,2,128,1,float16,fp8,0,0.012223999947309494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,96,4,128,1,fp8,fp8,0,0.03488479852676392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,96,4,128,1,float16,fp8,0,0.012390399724245072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,96,8,128,1,float16,fp8,0,0.05634239912033081
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,96,8,128,1,fp8,fp8,0,0.05594720244407654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,96,96,128,1,float16,float16,0,0.012217599898576736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,96,128,1,float16,float16,0,0.240012788772583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,96,128,1,float16,fp8,0,0.2304624080657959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,1,128,1,float16,float16,0,0.01440960019826889
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,1,128,1,float16,fp8,0,0.015025599300861359
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,1,128,1,fp8,fp8,0,0.014982399344444276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,2,128,1,float16,float16,0,0.015535999834537507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,2,128,1,float16,fp8,0,0.015331199765205384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,2,128,1,fp8,fp8,0,0.01549919992685318
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,4,128,1,float16,float16,0,0.021087999641895293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,4,128,1,float16,fp8,0,0.021155199408531188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,4,128,1,fp8,fp8,0,0.021054400503635405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,8,128,1,float16,float16,0,0.03248960077762604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,8,128,1,float16,fp8,0,0.03184640109539032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,8,128,1,fp8,fp8,0,0.03167999982833862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,96,128,1,float16,float16,0,0.1256432056427002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,96,128,1,float16,fp8,0,0.1208799958229065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,1,128,1,float16,float16,0,0.012886400520801543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,96,128,1,fp8,fp8,0,0.12138079404830933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,1,128,1,float16,fp8,0,0.01340160071849823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,1,128,1,fp8,fp8,0,0.013361600041389466
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,2,128,1,float16,float16,0,0.011475200206041336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,2,128,1,float16,fp8,0,0.011791999638080596
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,4,128,1,float16,float16,0,0.0141184002161026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,4,128,1,float16,fp8,0,0.014228799939155578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,4,128,1,fp8,fp8,0,0.013993600010871887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,8,128,1,float16,float16,0,0.019894400238990785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,8,128,1,float16,fp8,0,0.01940159946680069
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,8,128,1,fp8,fp8,0,0.01964640021324158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,96,128,1,float16,float16,0,0.0681119978427887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,96,128,1,float16,fp8,0,0.06600000262260437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,1,128,1,float16,float16,0,0.01252480000257492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,96,128,1,fp8,fp8,0,0.06589440107345582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,96,2,128,1,float16,fp8,0,0.023814399540424348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,1,128,1,fp8,fp8,0,0.012580800056457519
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,2,128,1,float16,float16,0,0.010777600109577179
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,2,128,1,float16,fp8,0,0.010718400031328202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,96,2,128,1,fp8,fp8,0,0.023520000278949738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,4,128,1,float16,float16,0,0.010955200344324113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,4,128,1,fp8,fp8,0,0.010804799944162368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,8,128,1,float16,float16,0,0.013337600231170654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,8,128,1,float16,fp8,0,0.013155199587345123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,8,128,1,fp8,fp8,0,0.013176000118255616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,96,8,128,1,float16,float16,0,0.05620480179786682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,96,128,1,float16,float16,0,0.03910239934921265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,96,128,1,float16,fp8,0,0.03789600133895874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,96,128,1,fp8,fp8,0,0.03804959952831268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,1,128,1,float16,float16,0,0.011404799669981003
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,1,128,1,float16,fp8,0,0.011928000301122666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,1,128,1,fp8,fp8,0,0.011736000329256058
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,2,128,1,float16,float16,0,0.010025600343942643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,2,128,1,float16,fp8,0,0.009787199646234512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,2,128,1,fp8,fp8,0,0.01000479981303215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,4,128,1,float16,float16,0,0.010089600086212158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,4,128,1,float16,fp8,0,0.010014399886131287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,4,128,1,fp8,fp8,0,0.01011200025677681
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,8,128,1,float16,fp8,0,0.010208000242710114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,8,128,1,fp8,fp8,0,0.010211200267076493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,96,128,1,float16,float16,0,0.02513760030269623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,96,128,1,float16,fp8,0,0.02328319996595383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,96,128,1,fp8,fp8,0,0.023179200291633607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,1,128,1,float16,float16,0,0.011519999802112579
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,1,128,1,float16,fp8,0,0.011886399984359742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,1,128,1,fp8,fp8,0,0.012080000340938568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,2,128,1,float16,float16,0,0.010121600329875946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,2,128,1,float16,fp8,0,0.009811200201511383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,96,2,128,1,fp8,fp8,0,0.011048000305891037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,2,128,1,fp8,fp8,0,0.009758400171995163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,4,128,1,float16,fp8,0,0.009960000216960908
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,4,128,1,fp8,fp8,0,0.009939199686050415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,8,128,1,float16,float16,0,0.010038399696350097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,8,128,1,float16,fp8,0,0.00995360016822815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,8,128,1,fp8,fp8,0,0.010124800354242324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,96,128,1,float16,float16,0,0.01737920045852661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,1,128,1,float16,fp8,0,0.012934400141239167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,96,128,1,float16,fp8,0,0.016014400124549865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,96,128,1,fp8,fp8,0,0.01600639969110489
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,1,128,1,float16,float16,0,0.011150400340557098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,2,128,1,fp8,fp8,0,0.010689599812030793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,1,128,1,fp8,fp8,0,0.011555200070142746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,2,128,1,float16,fp8,0,0.009495999664068222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,2,128,1,fp8,fp8,0,0.00966079980134964
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,4,128,1,float16,fp8,0,0.009662400186061858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,4,128,1,fp8,fp8,0,0.009662400186061858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,8,128,1,float16,float16,0,0.00984639972448349
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,8,128,1,float16,fp8,0,0.009865599870681762
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,8,128,1,fp8,fp8,0,0.009908799827098847
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,96,128,1,float16,float16,0,0.01480800062417984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,96,128,1,float16,fp8,0,0.013417600095272065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,96,128,1,fp8,fp8,0,0.013364799320697784
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,96,96,128,1,fp8,fp8,0,0.23016641139984131
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,96,8,128,1,float16,float16,0,0.010238400101661682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,1,128,1,float16,float16,0,0.011009600013494492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,1,128,1,float16,fp8,0,0.0114656001329422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,1,128,1,fp8,fp8,0,0.011350400000810622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,2,128,1,float16,float16,0,0.009480000287294389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,2,128,1,float16,fp8,0,0.009388799965381622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,2,128,1,fp8,fp8,0,0.009353599697351455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,4,128,1,float16,float16,0,0.009600000083446502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,4,128,1,float16,fp8,0,0.009385599941015243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,4,128,1,fp8,fp8,0,0.009427200257778167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,8,128,1,float16,float16,0,0.009636799991130828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,8,128,1,float16,fp8,0,0.009654399752616883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,96,128,1,float16,float16,0,0.012129600346088409
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,96,128,1,float16,fp8,0,0.01096159964799881
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,96,128,1,fp8,fp8,0,0.010875199735164643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,1,128,1,float16,float16,0,0.01091040000319481
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,96,4,128,1,float16,float16,0,0.009956800192594529
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,1,128,1,float16,fp8,0,0.01149279996752739
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,2,128,1,float16,float16,0,0.009544000029563904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,1,128,1,fp8,fp8,0,0.011401599645614624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,2,128,1,float16,fp8,0,0.009419199824333192
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,2,128,1,fp8,fp8,0,0.00939679965376854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,4,128,1,float16,float16,0,0.009596800059080124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,4,128,1,float16,fp8,0,0.00939520001411438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,4,128,1,fp8,fp8,0,0.009419199824333192
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,8,128,1,float16,float16,0,0.00968480035662651
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,8,128,1,fp8,fp8,0,0.009518399834632874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,1,128,1,float16,fp8,0,0.011507199704647064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,96,4,128,1,float16,fp8,0,0.010753600299358368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,4,128,1,float16,float16,0,0.00981760025024414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,96,8,128,1,fp8,fp8,0,0.009583999961614608
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,96,8,128,1,float16,fp8,0,0.00936800017952919
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,96,2,128,1,float16,float16,0,0.009849599748849868
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,64,1,128,1,fp8,fp8,0,48.00136108398438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,64,1,128,1,float16,fp8,0,49.254025268554685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,64,2,128,1,float16,fp8,0,48.312075805664065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,64,2,128,1,fp8,fp8,0,48.8191162109375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,64,4,128,1,float16,fp8,0,48.065328979492186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,64,1,128,1,float16,float16,0,61.42056884765625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,64,2,128,1,float16,float16,0,61.924835205078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,64,4,128,1,float16,float16,0,61.44142456054688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,1,128,1,float16,float16,0,30.80172119140625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,64,4,128,1,fp8,fp8,0,47.98065185546875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,64,8,128,1,float16,fp8,0,48.671548461914064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,64,8,128,1,fp8,fp8,0,49.2578369140625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,64,8,128,1,float16,float16,0,63.142791748046875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,1,128,1,float16,fp8,0,24.03826446533203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,1,128,1,fp8,fp8,0,23.96378173828125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,2,128,1,float16,fp8,0,24.70709991455078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,2,128,1,float16,float16,0,31.061370849609375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,2,128,1,fp8,fp8,0,24.34931640625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,4,128,1,float16,fp8,0,23.866574096679688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,4,128,1,fp8,fp8,0,24.3817626953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,4,128,1,float16,float16,0,30.848199462890626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,8,128,1,float16,float16,0,31.204959106445312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,8,128,1,float16,fp8,0,24.996351623535155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,8,128,1,fp8,fp8,0,24.208404541015625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,1,128,1,float16,float16,0,15.553459167480469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,1,128,1,float16,fp8,0,11.816054534912109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,1,128,1,fp8,fp8,0,12.1691650390625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,2,128,1,float16,float16,0,15.243382263183594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,2,128,1,float16,fp8,0,12.246348571777343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,2,128,1,fp8,fp8,0,11.935004425048827
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,4,128,1,float16,float16,0,15.988343811035156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,4,128,1,float16,fp8,0,11.857782745361328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,4,128,1,fp8,fp8,0,12.126251220703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,8,128,1,float16,float16,0,15.630555725097656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,8,128,1,float16,fp8,0,12.234121704101563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,64,128,1,float16,float16,0,15.780953979492187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,8,128,1,fp8,fp8,0,11.845043182373047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,64,128,1,fp8,fp8,0,12.231059265136718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,64,128,1,float16,float16,0,8.22841796875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,64,64,128,1,float16,fp8,0,12.289236450195313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,64,128,1,float16,fp8,0,6.200947189331055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,64,128,1,fp8,fp8,0,6.088483047485352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,1,128,1,fp8,fp8,0,5.876156616210937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,1,128,1,float16,fp8,0,6.19202880859375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,1,128,1,float16,float16,0,7.423136138916016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,64,128,1,float16,float16,0,30.890597534179687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,64,128,1,float16,fp8,0,24.734713745117187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,2,128,1,float16,fp8,0,5.880120086669922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,2,128,1,float16,float16,0,7.567066955566406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,2,128,1,fp8,fp8,0,5.869408035278321
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,4,128,1,float16,fp8,0,5.965636825561523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,4,128,1,float16,float16,0,7.880280303955078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,4,128,1,fp8,fp8,0,6.332369613647461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,64,64,128,1,fp8,fp8,0,24.907752990722656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,8,128,1,float16,fp8,0,5.999488067626953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,8,128,1,fp8,fp8,0,6.522901153564453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,64,8,128,1,float16,float16,0,7.540764617919922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,64,1,128,1,float16,fp8,0,27.7940673828125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,64,1,128,1,fp8,fp8,0,27.988973999023436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,64,2,128,1,fp8,fp8,0,27.260418701171876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,64,2,128,1,float16,fp8,0,28.449221801757812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,64,4,128,1,float16,fp8,0,27.473117065429687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,64,1,128,1,float16,float16,0,35.30845642089844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,64,2,128,1,float16,float16,0,34.23554992675781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,64,4,128,1,float16,float16,0,35.309417724609375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,64,128,1,float16,fp8,0,14.211306762695312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,64,128,1,float16,float16,0,19.076332092285156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,64,128,1,fp8,fp8,0,14.20296630859375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,64,4,128,1,fp8,fp8,0,27.676956176757812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,1,128,1,float16,float16,0,17.527317810058594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,64,8,128,1,float16,fp8,0,27.73052062988281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,64,8,128,1,fp8,fp8,0,28.397540283203124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,1,128,1,float16,fp8,0,13.564299011230469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,1,128,1,fp8,fp8,0,13.446966552734375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,64,8,128,1,float16,float16,0,35.966552734375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,2,128,1,float16,fp8,0,13.74755859375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,2,128,1,fp8,fp8,0,13.473722839355469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,2,128,1,float16,float16,0,17.455110168457033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,4,128,1,float16,fp8,0,13.817507934570312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,4,128,1,float16,float16,0,17.21292724609375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,4,128,1,fp8,fp8,0,13.664566040039062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,8,128,1,float16,fp8,0,14.012921142578126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,64,128,1,float16,fp8,0,7.093550109863282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,64,128,1,fp8,fp8,0,7.171214294433594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,64,128,1,float16,float16,0,9.183014678955079
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,8,128,1,float16,float16,0,17.948886108398437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,1,128,1,float16,float16,0,8.491241455078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,64,8,128,1,fp8,fp8,0,14.075160217285156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,1,128,1,float16,fp8,0,6.951006317138672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,1,128,1,fp8,fp8,0,6.896924591064453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,2,128,1,float16,fp8,0,7.063433837890625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,2,128,1,fp8,fp8,0,6.793536376953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,2,128,1,float16,float16,0,8.955452728271485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,4,128,1,float16,fp8,0,6.802337646484375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,4,128,1,float16,float16,0,8.625417327880859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,4,128,1,fp8,fp8,0,7.016883087158203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,64,128,1,float16,fp8,0,3.6321983337402344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,64,128,1,float16,float16,0,4.57165298461914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,8,128,1,float16,fp8,0,6.969305419921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,64,128,1,fp8,fp8,0,3.6372001647949217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,8,128,1,float16,float16,0,9.27891845703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,64,8,128,1,fp8,fp8,0,7.188326263427735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,1,128,1,float16,fp8,0,3.45880012512207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,1,128,1,float16,float16,0,4.149550247192383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,1,128,1,fp8,fp8,0,3.357088088989258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,2,128,1,float16,fp8,0,3.466089630126953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,2,128,1,float16,float16,0,4.259833526611328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,2,128,1,fp8,fp8,0,3.3776432037353517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,4,128,1,float16,fp8,0,3.429431915283203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,4,128,1,fp8,fp8,0,3.397809600830078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,4,128,1,float16,float16,0,4.247795104980469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,8,128,1,float16,fp8,0,3.377920150756836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,8,128,1,float16,float16,0,4.219646453857422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,64,8,128,1,fp8,fp8,0,3.5292320251464844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,64,1,128,1,float16,fp8,0,18.85785369873047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,64,1,128,1,fp8,fp8,0,19.38416748046875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,64,2,128,1,float16,fp8,0,19.16272430419922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,64,2,128,1,fp8,fp8,0,19.210098266601562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,64,4,128,1,float16,fp8,0,19.72620544433594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,64,1,128,1,float16,float16,0,24.127587890625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,64,2,128,1,float16,float16,0,24.817001342773438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,64,4,128,1,float16,float16,0,24.752146911621093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,64,128,1,float16,fp8,0,9.942880249023437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,64,128,1,float16,float16,0,13.395167541503906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,64,128,1,fp8,fp8,0,10.280308532714844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,1,128,1,float16,float16,0,11.919513702392578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,64,4,128,1,fp8,fp8,0,19.422787475585938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,64,8,128,1,float16,fp8,0,19.872843933105468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,64,8,128,1,fp8,fp8,0,19.596719360351564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,1,128,1,float16,fp8,0,9.434852600097656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,64,8,128,1,float16,float16,0,25.63358154296875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,1,128,1,fp8,fp8,0,9.644644927978515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,2,128,1,float16,fp8,0,9.46872787475586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,2,128,1,fp8,fp8,0,9.459194946289063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,2,128,1,float16,float16,0,12.338166046142579
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,4,128,1,float16,fp8,0,9.614511871337891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,4,128,1,float16,float16,0,12.40259017944336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,4,128,1,fp8,fp8,0,9.487859344482422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,64,128,1,float16,fp8,0,5.074732971191406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,64,128,1,float16,float16,0,6.438772583007813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,64,128,1,fp8,fp8,0,5.041062545776367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,8,128,1,float16,fp8,0,9.885739135742188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,8,128,1,float16,float16,0,12.346993255615235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,64,8,128,1,fp8,fp8,0,9.777471923828125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,1,128,1,float16,fp8,0,4.8453422546386715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,1,128,1,fp8,fp8,0,4.769392013549805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,1,128,1,float16,float16,0,5.9688880920410154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,2,128,1,float16,fp8,0,4.769684982299805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,2,128,1,float16,float16,0,6.0382640838623045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,4,128,1,float16,fp8,0,4.832400131225586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,4,128,1,fp8,fp8,0,4.866158294677734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,8,128,1,float16,fp8,0,4.8339599609375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,4,128,1,float16,float16,0,6.193707275390625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,64,128,1,float16,float16,0,3.2124881744384766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,2,128,1,fp8,fp8,0,4.840107345581055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,8,128,1,float16,float16,0,6.507899475097656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,64,8,128,1,fp8,fp8,0,4.860673522949218
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,64,128,1,float16,fp8,0,2.4917743682861326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,64,128,1,fp8,fp8,0,2.4903440475463867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,1,128,1,float16,fp8,0,2.545561599731445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,1,128,1,float16,float16,0,2.9358959197998047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,1,128,1,fp8,fp8,0,2.4744800567626952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,2,128,1,float16,float16,0,2.8172767639160154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,2,128,1,float16,fp8,0,2.508540725708008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,2,128,1,fp8,fp8,0,2.409552001953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,4,128,1,float16,fp8,0,2.344220733642578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,4,128,1,float16,float16,0,2.9320192337036133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,4,128,1,fp8,fp8,0,2.407828712463379
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,8,128,1,float16,fp8,0,2.3589391708374023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,8,128,1,float16,float16,0,3.0968511581420897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,64,8,128,1,fp8,fp8,0,2.5887632369995117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,64,1,128,1,float16,fp8,0,24.933303833007812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,64,1,128,1,fp8,fp8,0,24.830487060546876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,64,2,128,1,float16,fp8,0,24.678599548339843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,64,2,128,1,fp8,fp8,0,25.127862548828126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,64,1,128,1,float16,float16,0,31.93645324707031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,64,2,128,1,float16,float16,0,32.1186767578125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,64,4,128,1,float16,fp8,0,24.47499237060547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,64,4,128,1,float16,float16,0,32.689910888671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,64,128,1,float16,fp8,0,13.393473815917968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,64,128,1,fp8,fp8,0,13.60946044921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,64,128,1,float16,float16,0,17.13842315673828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,64,4,128,1,fp8,fp8,0,25.192213439941405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,1,128,1,float16,float16,0,15.913644409179687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,64,8,128,1,float16,fp8,0,25.163623046875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,64,8,128,1,fp8,fp8,0,25.436009216308594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,64,8,128,1,float16,float16,0,31.900491333007814
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,1,128,1,float16,fp8,0,12.585955047607422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,1,128,1,fp8,fp8,0,12.268560028076172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,2,128,1,float16,fp8,0,12.466577911376953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,2,128,1,fp8,fp8,0,12.54105453491211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,2,128,1,float16,float16,0,16.075137329101562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,4,128,1,float16,fp8,0,12.582536315917968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,4,128,1,float16,float16,0,15.876776123046875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,4,128,1,fp8,fp8,0,12.372727966308593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,64,128,1,float16,fp8,0,6.738211059570313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,64,128,1,fp8,fp8,0,6.6387168884277346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,8,128,1,float16,fp8,0,12.317272186279297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,8,128,1,fp8,fp8,0,12.752670288085938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,64,128,1,float16,float16,0,9.081999969482421
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,64,8,128,1,float16,float16,0,16.961268615722656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,1,128,1,fp8,fp8,0,6.262196731567383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,2,128,1,float16,fp8,0,6.183358383178711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,1,128,1,float16,float16,0,7.864641571044922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,1,128,1,float16,fp8,0,6.2013904571533205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,2,128,1,float16,float16,0,8.176334381103516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,2,128,1,fp8,fp8,0,6.198128128051758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,4,128,1,float16,fp8,0,6.28222885131836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,4,128,1,float16,float16,0,7.820655822753906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,4,128,1,fp8,fp8,0,6.253392028808594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,64,128,1,float16,float16,0,4.233822250366211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,64,128,1,float16,fp8,0,3.303575897216797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,8,128,1,float16,fp8,0,6.075969696044922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,8,128,1,fp8,fp8,0,6.10286865234375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,64,128,1,fp8,fp8,0,3.306719970703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,64,8,128,1,float16,float16,0,8.410155487060546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,1,128,1,float16,fp8,0,3.0742160797119142
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,1,128,1,float16,float16,0,3.8824447631835937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,1,128,1,fp8,fp8,0,3.2124881744384766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,2,128,1,float16,fp8,0,3.0504703521728516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,2,128,1,float16,float16,0,3.9023536682128905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,2,128,1,fp8,fp8,0,3.0733760833740233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,4,128,1,float16,float16,0,3.7395118713378905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,4,128,1,float16,fp8,0,3.414883041381836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,4,128,1,fp8,fp8,0,3.132307243347168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,64,128,1,float16,fp8,0,1.6190303802490233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,64,128,1,float16,float16,0,2.0976703643798826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,8,128,1,float16,fp8,0,3.0810016632080077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,8,128,1,fp8,fp8,0,3.0564800262451173
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,64,8,128,1,float16,float16,0,3.9560943603515626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,64,128,1,fp8,fp8,0,1.8101152420043944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,1,128,1,float16,float16,0,1.7938079833984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,1,128,1,float16,fp8,0,1.5440544128417968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,2,128,1,float16,fp8,0,1.5416048049926758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,2,128,1,fp8,fp8,0,1.5290752410888673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,2,128,1,float16,float16,0,1.9121519088745118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,4,128,1,float16,fp8,0,1.5672127723693847
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,4,128,1,fp8,fp8,0,1.635686492919922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,8,128,1,float16,float16,0,1.7782175064086914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,8,128,1,float16,fp8,0,1.5237232208251954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,8,128,1,fp8,fp8,0,1.5137375831604003
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,1,128,1,fp8,fp8,0,1.5114735603332519
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,64,4,128,1,float16,float16,0,1.9655807495117188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,64,1,128,1,fp8,fp8,0,14.147607421875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,64,1,128,1,float16,fp8,0,14.772450256347657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,64,2,128,1,float16,fp8,0,14.386219787597657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,64,2,128,1,fp8,fp8,0,14.125953674316406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,64,1,128,1,float16,float16,0,18.285255432128906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,64,4,128,1,float16,fp8,0,14.457603454589844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,64,2,128,1,float16,float16,0,18.762310791015626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,64,4,128,1,float16,float16,0,17.924514770507812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,64,128,1,float16,fp8,0,7.76305923461914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,64,128,1,fp8,fp8,0,7.7414192199707035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,64,128,1,float16,float16,0,9.97681121826172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,64,4,128,1,fp8,fp8,0,14.203543090820313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,1,128,1,float16,float16,0,8.830931091308594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,64,8,128,1,float16,fp8,0,14.690577697753906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,64,8,128,1,fp8,fp8,0,14.574862670898437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,64,8,128,1,float16,float16,0,18.811495971679687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,1,128,1,float16,fp8,0,7.208128356933594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,1,128,1,fp8,fp8,0,7.212924957275391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,2,128,1,float16,fp8,0,7.310523223876953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,2,128,1,fp8,fp8,0,7.313231658935547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,2,128,1,float16,float16,0,9.110717010498046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,4,128,1,float16,fp8,0,7.139899444580078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,4,128,1,float16,float16,0,9.380812835693359
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,4,128,1,fp8,fp8,0,7.274587249755859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,64,128,1,float16,fp8,0,3.878433609008789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,64,128,1,fp8,fp8,0,3.7774593353271486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,8,128,1,float16,fp8,0,7.282036590576172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,8,128,1,fp8,fp8,0,7.145452880859375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,64,8,128,1,float16,float16,0,10.186411285400391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,1,128,1,float16,fp8,0,3.5256832122802733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,1,128,1,fp8,fp8,0,3.5004161834716796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,64,128,1,float16,float16,0,5.0018047332763675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,1,128,1,float16,float16,0,4.449940872192383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,2,128,1,float16,float16,0,4.674107360839844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,2,128,1,float16,fp8,0,3.5216800689697267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,2,128,1,fp8,fp8,0,3.728750228881836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,4,128,1,float16,fp8,0,3.517095947265625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,4,128,1,fp8,fp8,0,3.4842784881591795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,4,128,1,float16,float16,0,4.505007934570313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,64,128,1,float16,float16,0,2.3659807205200196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,8,128,1,float16,float16,0,4.693320083618164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,8,128,1,float16,fp8,0,3.7238510131835936
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,64,128,1,fp8,fp8,0,1.8911472320556642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,64,8,128,1,fp8,fp8,0,3.5719966888427734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,1,128,1,float16,float16,0,2.0992431640625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,1,128,1,float16,fp8,0,1.9053264617919923
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,1,128,1,fp8,fp8,0,1.9465120315551758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,2,128,1,float16,fp8,0,1.7695104598999023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,2,128,1,fp8,fp8,0,1.7939056396484374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,4,128,1,fp8,fp8,0,1.7611120223999024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,4,128,1,float16,float16,0,2.1063936233520506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,64,128,1,float16,fp8,0,1.0692095756530762
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,64,128,1,float16,float16,0,1.1417471885681152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,64,128,1,float16,fp8,0,1.8640703201293944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,8,128,1,float16,float16,0,2.16297607421875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,8,128,1,fp8,fp8,0,1.761591911315918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,64,128,1,fp8,fp8,0,0.9808655738830566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,1,128,1,float16,float16,0,1.0604415893554688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,1,128,1,float16,fp8,0,0.8745632171630859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,1,128,1,fp8,fp8,0,0.9975968360900879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,2,128,1,float16,float16,0,2.3492895126342774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,2,128,1,float16,fp8,0,0.8888544082641602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,2,128,1,fp8,fp8,0,0.9681056022644043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,4,128,1,float16,fp8,0,1.917915153503418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,4,128,1,float16,fp8,0,0.9060352325439454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,4,128,1,fp8,fp8,0,0.8801664352416992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,8,128,1,float16,float16,0,1.014417552947998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,8,128,1,float16,fp8,0,0.9139039993286133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,8,128,1,fp8,fp8,0,0.8772432327270507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,64,8,128,1,float16,fp8,0,1.8281984329223633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,2,128,1,float16,float16,0,1.026369571685791
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,64,4,128,1,float16,float16,0,1.0171567916870117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,64,1,128,1,fp8,fp8,0,13.20923309326172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,64,2,128,1,fp8,fp8,0,13.447737121582032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,64,1,128,1,float16,float16,0,16.941830444335938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,64,4,128,1,float16,fp8,0,13.277952575683594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,64,2,128,1,float16,float16,0,17.158570861816408
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,64,1,128,1,float16,fp8,0,13.538104248046874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,64,2,128,1,float16,fp8,0,13.243772888183594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,64,4,128,1,float16,float16,0,16.823265075683594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,64,128,1,float16,fp8,0,7.392740631103516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,64,128,1,fp8,fp8,0,7.256547546386718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,64,128,1,float16,float16,0,9.517362976074219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,1,128,1,float16,float16,0,8.257353973388671
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,64,4,128,1,fp8,fp8,0,13.240829467773438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,64,8,128,1,fp8,fp8,0,13.46455078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,64,8,128,1,float16,float16,0,17.118376159667967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,1,128,1,fp8,fp8,0,6.552870178222657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,1,128,1,float16,fp8,0,6.710447692871094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,64,8,128,1,float16,fp8,0,14.004603576660156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,2,128,1,float16,fp8,0,6.590974426269531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,2,128,1,fp8,fp8,0,6.571148681640625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,2,128,1,float16,float16,0,8.34563980102539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,4,128,1,fp8,fp8,0,6.610371398925781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,4,128,1,float16,fp8,0,7.2989356994628904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,4,128,1,float16,float16,0,8.901399993896485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,64,128,1,float16,float16,0,4.710974502563476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,64,128,1,float16,fp8,0,3.6776718139648437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,8,128,1,float16,float16,0,8.531632232666016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,8,128,1,float16,fp8,0,6.670317077636719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,64,8,128,1,fp8,fp8,0,6.738091278076172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,64,128,1,fp8,fp8,0,3.604017639160156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,1,128,1,fp8,fp8,0,3.3375728607177733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,2,128,1,float16,float16,0,4.055316925048828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,2,128,1,fp8,fp8,0,3.3115070343017576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,4,128,1,float16,float16,0,4.089059066772461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,4,128,1,float16,fp8,0,3.2616737365722654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,4,128,1,fp8,fp8,0,3.2561038970947265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,1,128,1,float16,fp8,0,3.59996337890625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,8,128,1,float16,fp8,0,3.357270431518555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,2,128,1,float16,fp8,0,3.383884811401367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,8,128,1,float16,float16,0,4.208342361450195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,64,128,1,float16,float16,0,2.1402847290039064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,64,128,1,float16,fp8,0,1.8187103271484375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,8,128,1,fp8,fp8,0,3.4147056579589843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,64,128,1,fp8,fp8,0,1.7893280029296874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,1,128,1,float16,fp8,0,1.6284303665161133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,1,128,1,float16,float16,0,1.977841567993164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,1,128,1,fp8,fp8,0,1.7931663513183593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,2,128,1,float16,fp8,0,1.6902719497680665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,2,128,1,fp8,fp8,0,1.6458112716674804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,64,1,128,1,float16,float16,0,4.254699325561523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,4,128,1,float16,float16,0,1.8907167434692382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,4,128,1,float16,fp8,0,1.6323232650756836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,64,128,1,float16,float16,0,1.0891839981079101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,4,128,1,fp8,fp8,0,1.754852867126465
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,8,128,1,float16,fp8,0,1.6422111511230468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,8,128,1,float16,float16,0,2.0296703338623048
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,8,128,1,fp8,fp8,0,1.6323104858398438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,64,128,1,float16,fp8,0,0.9165871620178223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,64,128,1,fp8,fp8,0,0.8981216430664063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,1,128,1,float16,fp8,0,0.8181504249572754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,1,128,1,fp8,fp8,0,0.9491456031799317
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,2,128,1,float16,float16,0,0.9548111915588379
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,2,128,1,float16,fp8,0,0.8193344116210938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,2,128,1,fp8,fp8,0,0.8152912139892579
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,4,128,1,float16,float16,0,0.9510288238525391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,64,2,128,1,float16,float16,0,2.0229423522949217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,4,128,1,float16,fp8,0,0.8724880218505859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,4,128,1,fp8,fp8,0,0.8480095863342285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,8,128,1,float16,float16,0,0.9569631576538086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,8,128,1,float16,fp8,0,0.8239551544189453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,64,128,1,float16,fp8,0,0.47203521728515624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,64,128,1,float16,float16,0,0.5385776042938233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,64,128,1,fp8,fp8,0,0.4613999843597412
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,8,128,1,fp8,fp8,0,0.8455792427062988
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,1,128,1,float16,float16,0,0.4800096035003662
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,1,128,1,float16,fp8,0,0.41669278144836425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,1,128,1,fp8,fp8,0,0.42143998146057127
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,2,128,1,float16,float16,0,0.475600004196167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,2,128,1,float16,fp8,0,0.4160895824432373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,2,128,1,fp8,fp8,0,0.41910557746887206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,4,128,1,float16,fp8,0,0.41723041534423827
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,4,128,1,float16,float16,0,0.4905231952667236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,4,128,1,fp8,fp8,0,0.41809282302856443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,8,128,1,float16,fp8,0,0.41614398956298826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,8,128,1,float16,float16,0,0.4804255962371826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,64,8,128,1,fp8,fp8,0,0.41980800628662107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,64,1,128,1,float16,float16,0,0.952939224243164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,64,1,128,1,fp8,fp8,0,7.782132720947265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,64,1,128,1,float16,fp8,0,7.889100646972656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,64,2,128,1,fp8,fp8,0,7.750819396972656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,64,2,128,1,float16,fp8,0,7.778107452392578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,64,4,128,1,float16,fp8,0,7.772345733642578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,64,1,128,1,float16,float16,0,9.834575653076172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,64,2,128,1,float16,float16,0,9.896915435791016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,64,4,128,1,float16,float16,0,9.956774139404297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,64,128,1,float16,fp8,0,4.462140655517578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,64,128,1,fp8,fp8,0,4.426558303833008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,64,128,1,float16,float16,0,5.587668609619141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,1,128,1,float16,float16,0,4.887223815917968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,64,4,128,1,fp8,fp8,0,7.839518737792969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,64,8,128,1,float16,fp8,0,7.7962593078613285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,64,8,128,1,float16,float16,0,10.073907470703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,1,128,1,float16,fp8,0,3.884368133544922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,1,128,1,fp8,fp8,0,3.8365264892578126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,2,128,1,float16,fp8,0,3.9821807861328127
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,64,8,128,1,fp8,fp8,0,7.835931396484375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,2,128,1,fp8,fp8,0,3.872689437866211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,2,128,1,float16,float16,0,4.833774566650391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,4,128,1,float16,float16,0,4.870251083374024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,4,128,1,float16,fp8,0,3.9812881469726564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,64,128,1,float16,fp8,0,2.1836368560791017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,4,128,1,fp8,fp8,0,3.8976062774658202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,64,128,1,float16,float16,0,2.7432655334472655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,8,128,1,float16,fp8,0,3.9515407562255858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,64,128,1,fp8,fp8,0,2.2479583740234377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,8,128,1,fp8,fp8,0,3.8722496032714844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,1,128,1,float16,fp8,0,1.9452768325805665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,1,128,1,fp8,fp8,0,1.9506559371948242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,1,128,1,float16,float16,0,2.2937055587768556
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,2,128,1,float16,float16,0,2.3963136672973633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,2,128,1,float16,fp8,0,1.9506591796875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,2,128,1,fp8,fp8,0,1.9276304244995117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,4,128,1,float16,float16,0,2.318315124511719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,4,128,1,float16,fp8,0,1.9262208938598633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,4,128,1,fp8,fp8,0,1.9244255065917968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,64,8,128,1,float16,float16,0,4.87848014831543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,8,128,1,float16,float16,0,2.626937675476074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,64,128,1,float16,float16,0,1.29518404006958
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,64,128,1,float16,fp8,0,1.1664719581604004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,8,128,1,float16,fp8,0,2.030819129943848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,64,128,1,fp8,fp8,0,1.1059760093688964
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,1,128,1,float16,float16,0,1.1632927894592284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,64,8,128,1,fp8,fp8,0,1.9248559951782227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,1,128,1,float16,fp8,0,0.9735919952392578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,1,128,1,fp8,fp8,0,1.2301775932312011
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,2,128,1,float16,fp8,0,0.9866607666015625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,2,128,1,fp8,fp8,0,0.9856240272521972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,2,128,1,float16,float16,0,1.1307984352111817
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,4,128,1,float16,float16,0,1.1261712074279786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,4,128,1,fp8,fp8,0,0.9960592269897461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,8,128,1,float16,float16,0,1.1207951545715331
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,64,128,1,float16,float16,0,0.6655759811401367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,64,128,1,fp8,fp8,0,0.5939871788024902
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,1,128,1,float16,float16,0,0.5659599781036377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,8,128,1,float16,fp8,0,1.0743904113769531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,8,128,1,fp8,fp8,0,0.9776096343994141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,1,128,1,float16,fp8,0,0.5257008075714111
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,2,128,1,float16,float16,0,0.5779215812683105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,2,128,1,fp8,fp8,0,0.5397039890289307
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,2,128,1,float16,fp8,0,0.48924641609191893
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,4,128,1,float16,float16,0,0.5607071876525879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,4,128,1,float16,fp8,0,0.5053167819976807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,4,128,1,fp8,fp8,0,0.5089744091033935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,8,128,1,float16,fp8,0,0.4903535842895508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,8,128,1,float16,float16,0,0.5963871955871582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,64,128,1,float16,float16,0,0.3418992042541504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,64,128,1,float16,fp8,0,0.27972960472106934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,64,128,1,fp8,fp8,0,0.29373118877410886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,1,128,1,float16,float16,0,0.2890383958816528
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,1,128,1,fp8,fp8,0,0.2571791887283325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,2,128,1,float16,float16,0,0.288372802734375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,2,128,1,float16,fp8,0,0.25552480220794677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,2,128,1,fp8,fp8,0,0.25173919200897216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,64,4,128,1,float16,fp8,0,1.0951680183410644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,4,128,1,float16,float16,0,0.29136641025543214
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,4,128,1,float16,fp8,0,0.25300800800323486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,4,128,1,fp8,fp8,0,0.24965438842773438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,8,128,1,float16,float16,0,0.290775990486145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,64,128,1,float16,fp8,0,0.5983935832977295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,8,128,1,float16,fp8,0,0.25345919132232664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,8,128,1,fp8,fp8,0,0.2511919975280762
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,1,128,1,fp8,fp8,0,0.4954671859741211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,64,8,128,1,fp8,fp8,0,0.507697582244873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,64,1,128,1,float16,fp8,0,0.24774720668792724
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,64,1,128,1,float16,fp8,0,7.507254028320313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,64,1,128,1,fp8,fp8,0,7.537153625488282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,64,2,128,1,float16,fp8,0,7.6206817626953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,64,1,128,1,float16,float16,0,9.516958618164063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,64,2,128,1,float16,float16,0,9.445462036132813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,64,2,128,1,fp8,fp8,0,7.513003540039063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,64,4,128,1,float16,float16,0,9.551238250732421
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,64,4,128,1,float16,fp8,0,7.4965087890625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,64,128,1,float16,fp8,0,4.417817687988281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,64,128,1,float16,float16,0,5.679439926147461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,64,4,128,1,fp8,fp8,0,7.491961669921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,64,8,128,1,float16,fp8,0,7.661078643798828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,64,128,1,fp8,fp8,0,4.498863983154297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,64,8,128,1,fp8,fp8,0,7.784990692138672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,64,8,128,1,float16,float16,0,9.788410949707032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,1,128,1,float16,fp8,0,3.763294219970703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,1,128,1,float16,float16,0,4.4712158203125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,1,128,1,fp8,fp8,0,3.862441635131836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,2,128,1,float16,fp8,0,3.759312057495117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,2,128,1,float16,float16,0,4.548855972290039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,4,128,1,fp8,fp8,0,3.7509281158447267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,2,128,1,fp8,fp8,0,3.746867370605469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,64,128,1,float16,float16,0,2.7399887084960937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,8,128,1,float16,fp8,0,3.8095729827880858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,4,128,1,float16,float16,0,4.586067199707031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,8,128,1,float16,float16,0,4.651134490966797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,8,128,1,fp8,fp8,0,3.8012176513671876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,64,128,1,float16,fp8,0,2.194590377807617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,64,128,1,fp8,fp8,0,2.1977615356445312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,1,128,1,float16,float16,0,2.244985580444336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,1,128,1,float16,fp8,0,1.898031997680664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,1,128,1,fp8,fp8,0,1.9796928405761718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,2,128,1,float16,fp8,0,1.8949024200439453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,2,128,1,float16,float16,0,2.2638431549072267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,64,4,128,1,float16,fp8,0,3.7783329010009767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,2,128,1,fp8,fp8,0,1.892732810974121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,4,128,1,fp8,fp8,0,1.8926015853881837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,4,128,1,float16,float16,0,2.280019187927246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,64,128,1,float16,float16,0,1.3157440185546876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,8,128,1,float16,fp8,0,1.8790592193603515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,8,128,1,float16,float16,0,2.3014848709106444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,8,128,1,fp8,fp8,0,1.980620765686035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,64,128,1,float16,fp8,0,1.1064751625061036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,64,128,1,fp8,fp8,0,1.0981311798095703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,1,128,1,float16,float16,0,1.1524527549743653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,1,128,1,float16,fp8,0,0.9449119567871094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,2,128,1,float16,fp8,0,0.9563471794128418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,2,128,1,float16,float16,0,1.1937631607055663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,2,128,1,fp8,fp8,0,0.9455039978027344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,4,128,1,float16,float16,0,1.0929424285888671
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,4,128,1,float16,fp8,0,1.0084367752075196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,4,128,1,fp8,fp8,0,0.9451760292053223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,8,128,1,float16,float16,0,1.0960800170898437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,8,128,1,fp8,fp8,0,0.9535375595092773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,8,128,1,float16,fp8,0,1.0409392356872558
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,64,128,1,float16,fp8,0,0.5736559867858887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,64,128,1,fp8,fp8,0,0.5607264041900635
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,64,128,1,float16,float16,0,0.7019008159637451
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,64,4,128,1,float16,fp8,0,1.9327199935913086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,1,128,1,float16,float16,0,0.5716991901397706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,1,128,1,fp8,fp8,0,0.4962736129760742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,2,128,1,float16,fp8,0,0.4787919998168945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,2,128,1,fp8,fp8,0,0.5152976036071777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,4,128,1,float16,float16,0,0.5479983806610107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,4,128,1,float16,fp8,0,0.4984367847442627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,8,128,1,float16,float16,0,0.5563744068145752
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,8,128,1,float16,fp8,0,0.5141280174255372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,64,128,1,float16,float16,0,0.3425040006637573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,8,128,1,fp8,fp8,0,0.4850207805633545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,64,128,1,float16,fp8,0,0.28731839656829833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,64,128,1,fp8,fp8,0,0.2961904048919678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,64,1,128,1,fp8,fp8,0,0.9520336151123047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,1,128,1,float16,float16,0,0.27808640003204343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,1,128,1,float16,fp8,0,0.24389760494232177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,1,128,1,fp8,fp8,0,0.24419839382171632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,2,128,1,float16,float16,0,0.2827775955200195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,2,128,1,float16,fp8,0,0.24386239051818848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,2,128,1,fp8,fp8,0,0.24565439224243163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,4,128,1,float16,fp8,0,0.2446943998336792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,4,128,1,fp8,fp8,0,0.2478111982345581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,8,128,1,float16,float16,0,0.28429760932922366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,8,128,1,float16,fp8,0,0.2468480110168457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,8,128,1,fp8,fp8,0,0.24864959716796875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,64,128,1,float16,float16,0,0.17892160415649414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,64,128,1,float16,fp8,0,0.15016640424728395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,64,128,1,fp8,fp8,0,0.15132479667663573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,1,128,1,float16,float16,0,0.14459840059280396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,1,128,1,float16,fp8,0,0.1266767978668213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,1,128,1,fp8,fp8,0,0.12689599990844727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,2,128,1,float16,float16,0,0.14476319551467895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,2,128,1,float16,fp8,0,0.12730560302734376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,2,128,1,fp8,fp8,0,0.1260975956916809
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,4,128,1,float16,float16,0,0.14588160514831544
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,4,128,1,fp8,fp8,0,0.12634559869766235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,8,128,1,float16,float16,0,0.1479215979576111
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,8,128,1,float16,fp8,0,0.12742559909820556
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,1,128,1,float16,fp8,0,0.47867679595947266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,8,128,1,fp8,fp8,0,0.12882239818573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,2,128,1,float16,float16,0,0.5456575870513916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,64,4,128,1,fp8,fp8,0,0.491102409362793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,64,4,128,1,float16,float16,0,0.28049120903015134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,64,4,128,1,float16,fp8,0,0.1275712013244629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,64,1,128,1,float16,fp8,0,4.616113662719727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,64,1,128,1,float16,float16,0,5.610097503662109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,64,2,128,1,float16,float16,0,5.634873580932617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,64,2,128,1,fp8,fp8,0,4.604908752441406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,64,4,128,1,float16,fp8,0,4.613321685791016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,64,1,128,1,fp8,fp8,0,4.623710250854492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,64,4,128,1,float16,float16,0,5.52891845703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,64,2,128,1,float16,fp8,0,4.639364624023438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,64,4,128,1,fp8,fp8,0,4.607134246826172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,64,8,128,1,float16,fp8,0,4.744057464599609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,64,8,128,1,float16,float16,0,5.831849670410156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,64,128,1,float16,fp8,0,2.7800624847412108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,64,8,128,1,fp8,fp8,0,4.638526535034179
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,1,128,1,float16,float16,0,2.711840057373047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,64,128,1,float16,float16,0,3.4745983123779296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,1,128,1,float16,fp8,0,2.319424057006836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,1,128,1,fp8,fp8,0,2.3102367401123045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,2,128,1,float16,fp8,0,2.3000831604003906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,2,128,1,fp8,fp8,0,2.302102470397949
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,2,128,1,float16,float16,0,2.7752639770507814
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,4,128,1,fp8,fp8,0,2.3239887237548826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,4,128,1,float16,fp8,0,2.3618400573730467
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,4,128,1,float16,float16,0,2.8199951171875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,64,128,1,fp8,fp8,0,2.8517152786254885
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,8,128,1,float16,float16,0,2.7756975173950194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,64,128,1,float16,float16,0,1.7317728042602538
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,64,128,1,float16,fp8,0,1.414192008972168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,64,128,1,fp8,fp8,0,1.408518409729004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,8,128,1,float16,fp8,0,2.323431968688965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,1,128,1,float16,float16,0,1.4206864356994628
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,64,8,128,1,fp8,fp8,0,2.3309616088867187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,1,128,1,float16,fp8,0,1.1679936408996583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,1,128,1,fp8,fp8,0,1.267073631286621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,2,128,1,float16,fp8,0,1.1684608459472656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,2,128,1,fp8,fp8,0,1.162492847442627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,2,128,1,float16,float16,0,1.3822959899902343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,4,128,1,float16,fp8,0,1.2220720291137694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,4,128,1,fp8,fp8,0,1.160801601409912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,8,128,1,float16,float16,0,1.3425999641418458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,8,128,1,float16,fp8,0,1.1999903678894044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,64,128,1,float16,float16,0,0.8488224029541016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,64,128,1,fp8,fp8,0,0.7214879989624023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,8,128,1,fp8,fp8,0,1.1798175811767577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,1,128,1,float16,fp8,0,0.6168848037719726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,1,128,1,fp8,fp8,0,0.5884448051452636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,2,128,1,float16,fp8,0,0.5867648124694824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,2,128,1,float16,float16,0,0.7124864101409912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,2,128,1,fp8,fp8,0,0.5843520164489746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,4,128,1,float16,float16,0,0.6850719928741456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,4,128,1,float16,fp8,0,0.5857999801635743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,4,128,1,fp8,fp8,0,0.5843503952026368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,8,128,1,float16,fp8,0,0.5913407802581787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,8,128,1,float16,float16,0,0.7026576042175293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,64,128,1,float16,float16,0,0.4314688205718994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,8,128,1,fp8,fp8,0,0.613643217086792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,64,128,1,fp8,fp8,0,0.3618959903717041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,1,128,1,float16,float16,0,0.3417855978012085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,1,128,1,float16,fp8,0,0.29806880950927733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,1,128,1,fp8,fp8,0,0.3027647972106934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,2,128,1,float16,float16,0,0.33523681163787844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,2,128,1,float16,fp8,0,0.30190880298614503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,64,4,128,1,float16,float16,0,1.3081775665283204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,2,128,1,fp8,fp8,0,0.2979039907455444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,4,128,1,float16,fp8,0,0.2985680103302002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,4,128,1,fp8,fp8,0,0.3020240068435669
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,8,128,1,float16,fp8,0,0.301692795753479
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,8,128,1,float16,float16,0,0.34125919342041017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,64,128,1,float16,float16,0,0.2235584020614624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,8,128,1,fp8,fp8,0,0.30138239860534666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,64,128,1,float16,fp8,0,0.7405951976776123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,64,1,128,1,float16,float16,0,0.6532671928405762
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,1,128,1,float16,float16,0,0.174127995967865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,1,128,1,float16,fp8,0,0.15381439924240112
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,1,128,1,fp8,fp8,0,0.15328320264816284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,2,128,1,float16,float16,0,0.17260960340499878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,2,128,1,float16,fp8,0,0.15445280075073242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,2,128,1,fp8,fp8,0,0.15406719446182252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,4,128,1,float16,float16,0,0.1755776047706604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,4,128,1,float16,fp8,0,0.1548624038696289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,4,128,1,fp8,fp8,0,0.15404319763183594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,8,128,1,float16,float16,0,0.17904160022735596
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,8,128,1,fp8,fp8,0,0.1559775948524475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,64,128,1,float16,float16,0,0.1172111988067627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,64,128,1,float16,fp8,0,0.10060960054397583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,64,128,1,fp8,fp8,0,0.10137920379638672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,1,128,1,float16,float16,0,0.09200639724731445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,1,128,1,float16,fp8,0,0.07965279817581176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,1,128,1,fp8,fp8,0,0.07977439761161804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,2,128,1,float16,float16,0,0.09316959977149963
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,2,128,1,float16,fp8,0,0.07997440099716187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,2,128,1,fp8,fp8,0,0.08014879822731018
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,4,128,1,float16,float16,0,0.0935584008693695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,64,128,1,float16,fp8,0,0.36098880767822267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,4,128,1,float16,fp8,0,0.0804032027721405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,4,128,1,fp8,fp8,0,0.08061599731445312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,8,128,1,float16,float16,0,0.09536319971084595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,8,128,1,float16,fp8,0,0.08190879821777344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,64,8,128,1,fp8,fp8,0,0.08180320262908936
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,64,4,128,1,float16,float16,0,0.3418879985809326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,64,128,1,float16,fp8,0,0.18824800252914428
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,64,128,1,fp8,fp8,0,0.18741120100021363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,64,8,128,1,float16,fp8,0,0.156550395488739
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,64,1,128,1,float16,fp8,0,4.864020919799804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,64,1,128,1,float16,float16,0,5.6172943115234375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,64,2,128,1,fp8,fp8,0,4.8352001190185545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,64,2,128,1,float16,float16,0,5.620851135253906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,64,4,128,1,float16,float16,0,5.721913528442383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,64,1,128,1,fp8,fp8,0,4.845427322387695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,64,2,128,1,float16,fp8,0,4.854246520996094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,64,4,128,1,float16,fp8,0,4.841281509399414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,64,4,128,1,fp8,fp8,0,4.895931243896484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,64,8,128,1,float16,fp8,0,5.010425567626953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,64,128,1,float16,fp8,0,3.0682687759399414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,64,128,1,float16,float16,0,3.772382354736328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,64,8,128,1,float16,float16,0,5.834292984008789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,64,8,128,1,fp8,fp8,0,4.923284912109375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,1,128,1,float16,float16,0,2.734502410888672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,64,128,1,fp8,fp8,0,3.0652191162109377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,1,128,1,float16,fp8,0,2.435043144226074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,1,128,1,fp8,fp8,0,2.4523103713989256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,2,128,1,fp8,fp8,0,2.494534492492676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,2,128,1,float16,float16,0,2.657150459289551
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,2,128,1,float16,fp8,0,2.574777603149414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,4,128,1,float16,float16,0,2.7039119720458986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,4,128,1,float16,fp8,0,2.481068801879883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,4,128,1,fp8,fp8,0,2.429240036010742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,8,128,1,float16,float16,0,2.8717151641845704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,64,128,1,float16,fp8,0,1.5514207839965821
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,64,128,1,float16,float16,0,1.8672000885009765
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,8,128,1,float16,fp8,0,2.4910192489624023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,64,128,1,fp8,fp8,0,1.580833625793457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,1,128,1,float16,fp8,0,1.2233823776245116
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,1,128,1,float16,float16,0,1.3527008056640626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,64,8,128,1,fp8,fp8,0,2.4874576568603515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,1,128,1,fp8,fp8,0,1.283955192565918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,2,128,1,float16,float16,0,1.3415887832641602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,2,128,1,float16,fp8,0,1.2457872390747071
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,2,128,1,fp8,fp8,0,1.229793643951416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,4,128,1,float16,fp8,0,1.2274975776672363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,4,128,1,float16,float16,0,1.3615455627441406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,4,128,1,fp8,fp8,0,1.2472880363464356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,8,128,1,float16,float16,0,1.3940719604492187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,8,128,1,float16,fp8,0,1.259183979034424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,64,128,1,float16,fp8,0,0.7814335823059082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,64,128,1,float16,float16,0,0.9274335861206054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,64,8,128,1,fp8,fp8,0,1.2492752075195312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,1,128,1,float16,fp8,0,0.6192944049835205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,1,128,1,float16,float16,0,0.6910128116607666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,1,128,1,fp8,fp8,0,0.6425439834594726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,2,128,1,float16,float16,0,0.6766623973846435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,2,128,1,float16,fp8,0,0.6121952056884765
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,4,128,1,float16,fp8,0,0.6216832160949707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,2,128,1,fp8,fp8,0,0.618936014175415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,4,128,1,float16,float16,0,0.6871471881866456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,8,128,1,float16,float16,0,0.7092256069183349
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,8,128,1,float16,fp8,0,0.6257919788360595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,64,128,1,float16,fp8,0,0.39841599464416505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,64,128,1,float16,float16,0,0.47255678176879884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,64,128,1,fp8,fp8,0,0.39838080406188964
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,8,128,1,fp8,fp8,0,0.623628807067871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,1,128,1,float16,float16,0,0.34518239498138426
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,1,128,1,float16,fp8,0,0.3135008096694946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,1,128,1,fp8,fp8,0,0.31491520404815676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,2,128,1,fp8,fp8,0,0.31112160682678225
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,2,128,1,float16,float16,0,0.34535679817199705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,4,128,1,fp8,fp8,0,0.3163935899734497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,8,128,1,float16,fp8,0,0.3206016063690186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,8,128,1,fp8,fp8,0,0.31995038986206054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,64,128,1,float16,float16,0,0.2436288118362427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,64,128,1,float16,fp8,0,0.20503199100494385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,64,128,1,fp8,fp8,0,0.20517280101776122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,64,128,1,fp8,fp8,0,0.7803503990173339
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,1,128,1,float16,float16,0,0.17781599760055541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,1,128,1,float16,fp8,0,0.16220959424972534
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,1,128,1,fp8,fp8,0,0.16108800172805787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,2,128,1,float16,fp8,0,0.16274720430374146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,64,4,128,1,fp8,fp8,0,0.6289519786834716
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,2,128,1,fp8,fp8,0,0.16311520338058472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,4,128,1,float16,float16,0,0.18069920539855958
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,4,128,1,float16,fp8,0,0.1639888048171997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,4,128,1,fp8,fp8,0,0.16390399932861327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,8,128,1,float16,float16,0,0.18522239923477174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,8,128,1,float16,fp8,0,0.1658735990524292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,8,128,1,fp8,fp8,0,0.16530879735946655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,2,128,1,float16,fp8,0,0.315067195892334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,4,128,1,float16,fp8,0,0.31497280597686766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,64,128,1,float16,float16,0,0.12948640584945678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,64,128,1,float16,fp8,0,0.10930720567703248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,64,128,1,fp8,fp8,0,0.1091264009475708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,1,128,1,float16,fp8,0,0.08606240153312683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,1,128,1,fp8,fp8,0,0.0860751986503601
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,2,128,1,float16,fp8,0,0.0867247998714447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,8,128,1,float16,float16,0,0.35431680679321287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,2,128,1,fp8,fp8,0,0.08619199991226197
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,4,128,1,float16,float16,0,0.09651200175285339
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,4,128,1,float16,fp8,0,0.08633279800415039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,4,128,1,fp8,fp8,0,0.08694239854812622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,8,128,1,float16,float16,0,0.09840800166130066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,8,128,1,float16,fp8,0,0.08711519837379456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,64,128,1,float16,float16,0,0.0698095977306366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,64,128,1,float16,fp8,0,0.058499199151992795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,1,128,1,float16,float16,0,0.04841279983520508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,1,128,1,float16,fp8,0,0.045291200280189514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,1,128,1,fp8,fp8,0,0.04531359970569611
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,64,2,128,1,float16,float16,0,0.17867039442062377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,2,128,1,float16,float16,0,0.04882720112800598
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,2,128,1,float16,fp8,0,0.045998400449752806
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,2,128,1,fp8,fp8,0,0.04532000124454498
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,4,128,1,float16,float16,0,0.050160002708435056
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,4,128,1,float16,fp8,0,0.04518559873104096
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,4,128,1,fp8,fp8,0,0.046059200167655946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,8,128,1,float16,fp8,0,0.04633600115776062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,8,128,1,fp8,fp8,0,0.04603840112686157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,64,4,128,1,float16,float16,0,0.34840960502624513
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,1,128,1,float16,float16,0,0.09456800222396851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,2,128,1,float16,float16,0,0.09521120190620422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,64,8,128,1,fp8,fp8,0,0.08800960183143616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,64,128,1,fp8,fp8,0,0.058627200126647946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,64,8,128,1,float16,float16,0,0.050900799036026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,64,1,128,1,float16,float16,0,3.779774475097656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,64,1,128,1,float16,fp8,0,3.640639877319336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,64,1,128,1,fp8,fp8,0,3.581710433959961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,64,2,128,1,float16,float16,0,3.7875087738037108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,64,2,128,1,float16,fp8,0,3.59990234375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,64,2,128,1,fp8,fp8,0,3.599723052978516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,64,4,128,1,float16,float16,0,3.862905502319336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,64,4,128,1,float16,fp8,0,3.6662654876708984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,64,4,128,1,fp8,fp8,0,3.6193328857421876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,64,8,128,1,float16,fp8,0,3.68250732421875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,64,128,1,float16,float16,0,2.8857599258422852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,64,8,128,1,float16,float16,0,3.9606273651123045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,64,128,1,float16,fp8,0,2.4762079238891603
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,64,8,128,1,fp8,fp8,0,3.739801788330078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,1,128,1,float16,fp8,0,1.8051759719848632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,64,128,1,fp8,fp8,0,2.489806365966797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,1,128,1,fp8,fp8,0,1.7984832763671874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,2,128,1,float16,fp8,0,1.8263408660888671
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,2,128,1,fp8,fp8,0,1.8043855667114257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,4,128,1,float16,float16,0,1.9314895629882813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,4,128,1,float16,fp8,0,1.8146879196166992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,4,128,1,fp8,fp8,0,1.8355712890625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,8,128,1,float16,float16,0,1.977342414855957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,8,128,1,float16,fp8,0,1.8715200424194336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,1,128,1,float16,float16,0,1.8868528366088868
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,64,128,1,fp8,fp8,0,1.2541888236999512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,1,128,1,float16,float16,0,0.9633695602416992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,1,128,1,float16,fp8,0,0.9165743827819824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,1,128,1,fp8,fp8,0,0.915494441986084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,2,128,1,float16,float16,0,1.9056304931640624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,2,128,1,float16,float16,0,0.954644775390625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,2,128,1,float16,fp8,0,0.9174544334411621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,2,128,1,fp8,fp8,0,0.9153583526611329
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,4,128,1,float16,float16,0,0.9718976020812988
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,4,128,1,float16,fp8,0,0.9218640327453613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,4,128,1,fp8,fp8,0,0.9229167938232422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,8,128,1,float16,float16,0,1.0003168106079101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,64,128,1,float16,float16,0,1.4571904182434081
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,64,128,1,float16,float16,0,0.7323247909545898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,64,8,128,1,fp8,fp8,0,1.857931137084961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,64,128,1,float16,fp8,0,0.6329599857330322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,64,128,1,fp8,fp8,0,0.6307951927185058
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,1,128,1,float16,float16,0,0.47974557876586915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,1,128,1,fp8,fp8,0,0.4596992015838623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,2,128,1,float16,float16,0,0.4827136039733887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,2,128,1,float16,fp8,0,0.46117281913757324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,2,128,1,fp8,fp8,0,0.46219840049743655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,4,128,1,float16,float16,0,0.4879648208618164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,4,128,1,float16,fp8,0,0.4668464183807373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,4,128,1,fp8,fp8,0,0.4655776023864746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,8,128,1,float16,float16,0,0.50698881149292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,8,128,1,float16,fp8,0,0.4732800006866455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,8,128,1,fp8,fp8,0,0.4729951858520508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,64,128,1,float16,float16,0,0.3740319967269897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,64,128,1,float16,fp8,0,0.32240478992462157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,1,128,1,fp8,fp8,0,0.23612959384918214
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,1,128,1,float16,fp8,0,0.23551199436187745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,2,128,1,float16,float16,0,0.24622719287872313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,8,128,1,fp8,fp8,0,0.9458271980285644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,2,128,1,float16,fp8,0,0.23586080074310303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,64,128,1,float16,fp8,0,1.244643211364746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,2,128,1,fp8,fp8,0,0.23783519268035888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,4,128,1,float16,float16,0,0.25073919296264646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,4,128,1,float16,fp8,0,0.23467040061950684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,4,128,1,fp8,fp8,0,0.23997759819030762
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,64,1,128,1,float16,fp8,0,0.4625840187072754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,8,128,1,float16,float16,0,0.2591007947921753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,8,128,1,float16,fp8,0,0.23990719318389891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,8,128,1,fp8,fp8,0,0.24506878852844238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,64,128,1,float16,fp8,0,0.16731679439544678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,64,128,1,float16,float16,0,0.1934432029724121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,1,128,1,float16,float16,0,0.12949440479278565
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,1,128,1,float16,fp8,0,0.1224128007888794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,1,128,1,fp8,fp8,0,0.12328159809112549
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,2,128,1,float16,float16,0,0.1306015968322754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,2,128,1,float16,fp8,0,0.12319359779357911
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,2,128,1,fp8,fp8,0,0.12343039512634277
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,4,128,1,float16,float16,0,0.13262879848480225
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,4,128,1,float16,fp8,0,0.12415200471878052
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,4,128,1,fp8,fp8,0,0.12431679964065552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,8,128,1,float16,float16,0,0.13761279582977295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,8,128,1,float16,fp8,0,0.1262943983078003
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,64,128,1,float16,float16,0,0.10491679906845093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,8,128,1,fp8,fp8,0,0.12687840461730956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,64,128,1,fp8,fp8,0,0.08998079895973206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,1,128,1,float16,float16,0,0.06971039772033691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,1,128,1,float16,float16,0,0.24447519779205323
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,1,128,1,fp8,fp8,0,0.06643360257148742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,1,128,1,float16,fp8,0,0.06710399985313416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,2,128,1,float16,float16,0,0.06955360174179077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,2,128,1,fp8,fp8,0,0.06678879857063294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,4,128,1,float16,float16,0,0.07105759978294372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,4,128,1,float16,fp8,0,0.06743360161781312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,4,128,1,fp8,fp8,0,0.06694560050964356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,8,128,1,float16,float16,0,0.0729856014251709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,8,128,1,float16,fp8,0,0.06823199987411499
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,8,128,1,fp8,fp8,0,0.06849920153617858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,64,128,1,float16,float16,0,0.05746399760246277
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,64,128,1,float16,fp8,0,0.050761598348617556
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,64,8,128,1,float16,fp8,0,0.9424927711486817
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,1,128,1,float16,float16,0,0.03609600067138672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,1,128,1,fp8,fp8,0,0.03542239964008331
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,1,128,1,float16,fp8,0,0.03531520068645477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,2,128,1,float16,float16,0,0.03639520108699799
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,2,128,1,float16,fp8,0,0.03549599945545197
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,2,128,1,fp8,fp8,0,0.03556160032749176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,4,128,1,float16,float16,0,0.036743998527526855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,4,128,1,float16,fp8,0,0.03548319935798645
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,4,128,1,fp8,fp8,0,0.035688000917434695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,8,128,1,float16,float16,0,0.03879519999027252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,8,128,1,float16,fp8,0,0.035953599214553836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,8,128,1,fp8,fp8,0,0.035812801122665404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,64,128,1,float16,float16,0,0.03465920090675354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,64,128,1,float16,fp8,0,0.028854399919509888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,64,128,1,fp8,fp8,0,0.028723201155662535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,1,128,1,float16,float16,0,0.021771200001239777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,64,64,128,1,fp8,fp8,0,0.16579359769821167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,1,128,1,fp8,fp8,0,0.022383999824523926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,2,128,1,float16,float16,0,0.021817600727081297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,2,128,1,float16,fp8,0,0.02235199958086014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,2,128,1,fp8,fp8,0,0.022145600616931917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,4,128,1,float16,float16,0,0.022091199457645417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,4,128,1,float16,fp8,0,0.022431999444961548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,4,128,1,fp8,fp8,0,0.022313599288463593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,8,128,1,float16,float16,0,0.022342400252819063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,8,128,1,float16,fp8,0,0.022417600452899932
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,8,128,1,fp8,fp8,0,0.022409600019454957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,64,128,1,float16,fp8,0,0.08979359865188599
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,64,64,128,1,fp8,fp8,0,0.32138240337371826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,64,2,128,1,float16,fp8,0,0.06692479848861695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,64,1,128,1,float16,float16,0,1.4899824142456055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,64,1,128,1,float16,fp8,0,1.4970432281494142
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,64,64,128,1,fp8,fp8,0,0.050355201959609984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,64,1,128,1,fp8,fp8,0,1.4996224403381349
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,64,2,128,1,float16,float16,0,1.4891183853149415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,64,2,128,1,float16,fp8,0,1.5045568466186523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,64,1,128,1,float16,fp8,0,0.02224159985780716
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,64,4,128,1,float16,float16,0,1.5093968391418457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,64,2,128,1,fp8,fp8,0,1.514246368408203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,64,4,128,1,float16,fp8,0,1.5201807975769044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,64,4,128,1,fp8,fp8,0,1.5151616096496583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,64,8,128,1,float16,float16,0,1.5779376029968262
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,64,8,128,1,float16,fp8,0,1.5505552291870117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,64,128,1,float16,float16,0,1.2696623802185059
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,1,128,1,float16,float16,0,0.7473951816558838
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,64,8,128,1,fp8,fp8,0,1.549931240081787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,1,128,1,float16,fp8,0,0.7577167987823487
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,64,128,1,float16,fp8,0,1.0472384452819825
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,64,128,1,fp8,fp8,0,1.0584815979003905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,1,128,1,fp8,fp8,0,0.7623680114746094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,2,128,1,float16,float16,0,0.750705623626709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,2,128,1,fp8,fp8,0,0.7581408023834229
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,2,128,1,float16,fp8,0,0.7576511859893799
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,4,128,1,float16,fp8,0,0.7646592140197754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,4,128,1,fp8,fp8,0,0.7588160037994385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,4,128,1,float16,float16,0,0.767139196395874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,8,128,1,float16,float16,0,0.8056672096252442
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,8,128,1,float16,fp8,0,0.7856383800506592
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,1,128,1,float16,float16,0,0.3773967981338501
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,64,128,1,float16,fp8,0,0.5336128234863281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,64,128,1,fp8,fp8,0,0.5336143970489502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,1,128,1,float16,fp8,0,0.38738720417022704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,1,128,1,fp8,fp8,0,0.38701438903808594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,2,128,1,float16,float16,0,0.37903358936309817
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,2,128,1,float16,fp8,0,0.3845407962799072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,2,128,1,fp8,fp8,0,0.3848191976547241
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,4,128,1,float16,fp8,0,0.3916304111480713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,4,128,1,fp8,fp8,0,0.3856719970703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,8,128,1,float16,float16,0,0.4066751956939697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,8,128,1,fp8,fp8,0,0.3996815919876099
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,64,128,1,float16,float16,0,0.32854719161987306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,64,128,1,float16,fp8,0,0.2737663984298706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,1,128,1,float16,float16,0,0.1950752019882202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,64,128,1,fp8,fp8,0,0.2755552053451538
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,1,128,1,float16,fp8,0,0.197707200050354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,1,128,1,fp8,fp8,0,0.19624320268630982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,2,128,1,float16,float16,0,0.19852800369262696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,2,128,1,float16,fp8,0,0.19736479520797728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,2,128,1,fp8,fp8,0,0.19589439630508423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,4,128,1,float16,float16,0,0.20327360630035402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,4,128,1,float16,fp8,0,0.19929759502410888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,4,128,1,fp8,fp8,0,0.1979439973831177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,8,128,1,float16,float16,0,0.21181919574737548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,8,128,1,float16,fp8,0,0.20378880500793456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,64,8,128,1,fp8,fp8,0,0.20269439220428467
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,64,128,1,float16,float16,0,0.1721984028816223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,64,128,1,float16,fp8,0,0.14333280324935913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,64,128,1,fp8,fp8,0,0.14286719560623168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,1,128,1,float16,fp8,0,0.10392800569534302
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,1,128,1,fp8,fp8,0,0.10298080444335937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,2,128,1,float16,float16,0,0.10478399991989136
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,2,128,1,float16,fp8,0,0.10320960283279419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,64,8,128,1,fp8,fp8,0,0.781540822982788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,64,128,1,float16,float16,0,0.6437407970428467
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,2,128,1,fp8,fp8,0,0.10421760082244873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,4,128,1,float16,float16,0,0.10623680353164673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,4,128,1,float16,fp8,0,0.1055791974067688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,4,128,1,fp8,fp8,0,0.10547360181808471
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,8,128,1,float16,fp8,0,0.10640959739685059
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,8,128,1,fp8,fp8,0,0.10733439922332763
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,64,128,1,float16,fp8,0,0.0777616024017334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,64,128,1,fp8,fp8,0,0.07871999740600585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,1,128,1,float16,float16,0,0.05773599743843079
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,4,128,1,float16,float16,0,0.3924560070037842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,1,128,1,float16,fp8,0,0.05742400288581848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,1,128,1,fp8,fp8,0,0.056435197591781616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,2,128,1,float16,fp8,0,0.05760319828987122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,2,128,1,float16,float16,0,0.05912320017814636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,2,128,1,fp8,fp8,0,0.056729602813720706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,64,8,128,1,float16,fp8,0,0.39663360118865965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,4,128,1,float16,float16,0,0.05912960171699524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,4,128,1,float16,fp8,0,0.058329600095748904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,4,128,1,fp8,fp8,0,0.05739679932594299
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,8,128,1,float16,float16,0,0.06091359853744507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,8,128,1,float16,fp8,0,0.0589792013168335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,64,128,1,float16,float16,0,0.05211520195007324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,64,128,1,float16,fp8,0,0.04483680129051208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,64,128,1,fp8,fp8,0,0.044593599438667295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,1,128,1,float16,fp8,0,0.030980798602104186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,1,128,1,fp8,fp8,0,0.030862399935722352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,2,128,1,float16,float16,0,0.030604800581932066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,2,128,1,fp8,fp8,0,0.030972799658775328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,4,128,1,float16,float16,0,0.03129119873046875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,4,128,1,float16,fp8,0,0.030883198976516722
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,4,128,1,fp8,fp8,0,0.030907198786735535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,8,128,1,float16,float16,0,0.03271040022373199
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,8,128,1,float16,fp8,0,0.03118399977684021
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,8,128,1,fp8,fp8,0,0.031376001238822934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,64,128,1,float16,float16,0,0.03103039860725403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,64,128,1,float16,fp8,0,0.024271999299526215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,64,128,1,fp8,fp8,0,0.02422720044851303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,1,128,1,float16,float16,0,0.10504319667816162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,1,128,1,float16,float16,0,0.018961599469184874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,1,128,1,fp8,fp8,0,0.019305600225925444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,2,128,1,float16,float16,0,0.018849599361419677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,2,128,1,float16,fp8,0,0.019331200420856474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,2,128,1,fp8,fp8,0,0.019230400025844575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,4,128,1,float16,float16,0,0.01887200027704239
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,64,128,1,float16,float16,0,0.09374079704284669
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,4,128,1,float16,fp8,0,0.019254399836063384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,4,128,1,fp8,fp8,0,0.0192671999335289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,8,128,1,fp8,fp8,0,0.019124799966812135
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,8,128,1,float16,fp8,0,0.019230400025844575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,64,128,1,float16,float16,0,0.016369600594043732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,64,128,1,float16,fp8,0,0.015928000211715698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,64,128,1,fp8,fp8,0,0.01563519984483719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,1,128,1,float16,float16,0,0.014076800644397735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,1,128,1,float16,fp8,0,0.013967999815940857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,1,128,1,float16,float16,0,0.03017280101776123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,2,128,1,float16,float16,0,0.013766400516033173
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,2,128,1,float16,fp8,0,0.014180800318717957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,2,128,1,fp8,fp8,0,0.014131200313568116
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,4,128,1,float16,float16,0,0.014115199446678162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,4,128,1,float16,fp8,0,0.014176000654697419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,64,2,128,1,float16,fp8,0,0.030878400802612303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,4,128,1,fp8,fp8,0,0.014115199446678162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,8,128,1,float16,float16,0,0.01417119950056076
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,8,128,1,float16,fp8,0,0.014268800616264343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,8,128,1,fp8,fp8,0,0.014363199472427368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,64,1,128,1,float16,float16,0,0.6814511775970459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,64,1,128,1,float16,fp8,0,0.720088005065918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,1,128,1,float16,fp8,0,0.01931679993867874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,64,1,128,1,fp8,fp8,0,0.7191760063171386
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,64,2,128,1,float16,float16,0,0.6829552173614502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,64,8,128,1,float16,float16,0,0.11117279529571533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,64,2,128,1,float16,fp8,0,0.7145904064178467
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,64,2,128,1,fp8,fp8,0,0.7182960033416748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,64,4,128,1,float16,float16,0,0.7016608238220214
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,64,4,128,1,float16,fp8,0,0.7276559829711914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,64,8,128,1,fp8,fp8,0,0.059543997049331665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,64,1,128,1,fp8,fp8,0,0.014291200041770934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,64,4,128,1,fp8,fp8,0,0.7245247840881348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,64,8,128,1,float16,float16,0,0.7299456119537353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,64,8,128,1,float16,fp8,0,0.7544000148773193
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,64,8,128,1,fp8,fp8,0,0.7517072200775147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,64,128,1,float16,float16,0,0.6096432209014893
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,1,128,1,float16,float16,0,0.34825279712677004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,1,128,1,float16,fp8,0,0.3654207944869995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,1,128,1,fp8,fp8,0,0.36375041007995607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,2,128,1,float16,float16,0,0.3489840030670166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,2,128,1,fp8,fp8,0,0.36360960006713866
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,4,128,1,float16,float16,0,0.3566751956939697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,4,128,1,float16,fp8,0,0.3697648048400879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,4,128,1,fp8,fp8,0,0.3683727979660034
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,8,128,1,float16,float16,0,0.3706367969512939
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,8,128,1,float16,fp8,0,0.38319199085235595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,8,128,1,fp8,fp8,0,0.3808799982070923
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,64,128,1,float16,float16,0,0.3121232032775879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,64,128,1,float16,fp8,0,0.2752543926239014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,64,128,1,fp8,fp8,0,0.27496800422668455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,1,128,1,float16,float16,0,0.17868319749832154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,1,128,1,float16,fp8,0,0.1870255947113037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,1,128,1,fp8,fp8,0,0.18742400407791138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,2,128,1,float16,float16,0,0.17915040254592896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,2,128,1,fp8,fp8,0,0.18850400447845458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,2,128,1,float16,fp8,0,0.1852895975112915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,4,128,1,float16,float16,0,0.18449440002441406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,4,128,1,float16,fp8,0,0.18881920576095582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,4,128,1,fp8,fp8,0,0.19037439823150634
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,8,128,1,float16,float16,0,0.19054239988327026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,8,128,1,float16,fp8,0,0.19625600576400756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,64,8,128,1,fp8,fp8,0,0.19562239646911622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,64,128,1,float16,fp8,0,0.14120639562606813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,64,128,1,float16,fp8,0,0.5339695930480957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,64,128,1,float16,float16,0,0.16376800537109376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,1,128,1,float16,float16,0,0.09561920166015625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,64,128,1,fp8,fp8,0,0.5324912071228027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,1,128,1,float16,fp8,0,0.09977759718894959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,2,128,1,float16,fp8,0,0.10043679475784302
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,1,128,1,fp8,fp8,0,0.10029439926147461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,64,2,128,1,float16,fp8,0,0.3645328044891357
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,4,128,1,float16,float16,0,0.0963919997215271
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,4,128,1,fp8,fp8,0,0.1013375997543335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,8,128,1,float16,fp8,0,0.10257120132446289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,8,128,1,fp8,fp8,0,0.10422559976577758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,64,128,1,float16,float16,0,0.08961120247840881
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,64,128,1,float16,fp8,0,0.07505279779434204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,64,128,1,fp8,fp8,0,0.07497119903564453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,1,128,1,float16,float16,0,0.053883200883865355
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,1,128,1,float16,fp8,0,0.054071998596191405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,1,128,1,fp8,fp8,0,0.0543008029460907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,2,128,1,float16,float16,0,0.054343998432159424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,2,128,1,float16,fp8,0,0.0547327995300293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,2,128,1,fp8,fp8,0,0.05453439950942993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,64,8,128,1,float16,float16,0,0.01943040043115616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,4,128,1,float16,float16,0,0.0548799991607666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,4,128,1,float16,fp8,0,0.054955202341079715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,4,128,1,fp8,fp8,0,0.055060797929763795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,8,128,1,float16,fp8,0,0.056411200761795045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,8,128,1,fp8,fp8,0,0.05611519813537598
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,64,128,1,float16,float16,0,0.049907198548316954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,64,128,1,float16,fp8,0,0.04012320041656494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,64,128,1,fp8,fp8,0,0.039878401160240176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,1,128,1,float16,float16,0,0.026927998661994933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,1,128,1,float16,fp8,0,0.02778559923171997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,1,128,1,fp8,fp8,0,0.028030401468276976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,2,128,1,float16,float16,0,0.027107200026512145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,2,128,1,float16,fp8,0,0.0279664009809494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,2,128,1,fp8,fp8,0,0.027769601345062254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,4,128,1,float16,float16,0,0.027907198667526244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,4,128,1,float16,fp8,0,0.027827200293540955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,4,128,1,fp8,fp8,0,0.028126400709152222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,8,128,1,float16,float16,0,0.02980799973011017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,8,128,1,float16,fp8,0,0.028428798913955687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,64,8,128,1,fp8,fp8,0,0.02823199927806854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,64,128,1,float16,fp8,0,0.020203199982643128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,64,128,1,fp8,fp8,0,0.09198560118675232
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,64,128,1,fp8,fp8,0,0.1421344041824341
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,1,128,1,float16,fp8,0,0.017905600368976593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,2,128,1,float16,float16,0,0.09569119811058044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,2,128,1,fp8,fp8,0,0.10037280321121216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,2,128,1,float16,float16,0,0.017535999417304993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,2,128,1,float16,fp8,0,0.017979200184345245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,2,128,1,fp8,fp8,0,0.018092800676822663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,4,128,1,float16,fp8,0,0.10091520547866821
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,4,128,1,float16,float16,0,0.017636799812316896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,4,128,1,float16,fp8,0,0.01799200028181076
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,4,128,1,fp8,fp8,0,0.01793760061264038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,8,128,1,float16,float16,0,0.018012799322605133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,8,128,1,fp8,fp8,0,0.018083199858665466
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,64,128,1,float16,float16,0,0.014921599626541137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,64,128,1,float16,fp8,0,0.014001600444316864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,64,128,1,fp8,fp8,0,0.013995200395584106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,1,128,1,float16,float16,0,0.012484800070524216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,1,128,1,float16,fp8,0,0.012982399761676788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,1,128,1,fp8,fp8,0,0.013007999956607818
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,2,128,1,float16,float16,0,0.012515200674533844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,64,8,128,1,float16,float16,0,0.05793439745903015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,2,128,1,fp8,fp8,0,0.012848000228404998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,4,128,1,float16,float16,0,0.012464000284671784
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,4,128,1,float16,fp8,0,0.01268800050020218
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,4,128,1,fp8,fp8,0,0.01286720037460327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,8,128,1,float16,float16,0,0.012860800325870513
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,8,128,1,float16,fp8,0,0.013027200102806091
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,8,128,1,fp8,fp8,0,0.012972800433635712
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,64,128,1,float16,float16,0,0.01430719941854477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,64,128,1,float16,fp8,0,0.013238400220870972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,64,128,1,fp8,fp8,0,0.013289600610733032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,1,128,1,float16,float16,0,0.012137600034475327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,1,128,1,float16,fp8,0,0.012481600046157837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,1,128,1,float16,float16,0,0.017209599912166595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,1,128,1,fp8,fp8,0,0.012465599924325943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,2,128,1,float16,float16,0,0.012043199688196182
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,2,128,1,float16,fp8,0,0.012535999715328216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,2,128,1,fp8,fp8,0,0.012432000041007996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,1,128,1,fp8,fp8,0,0.01780800074338913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,4,128,1,float16,fp8,0,0.012579199671745301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,8,128,1,float16,float16,0,0.012467200309038163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,64,8,128,1,float16,float16,0,0.10155839920043945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,8,128,1,float16,fp8,0,0.018078400194644927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,64,1,128,1,float16,float16,0,0.34968640804290774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,64,2,128,1,float16,fp8,0,0.012892800569534301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,64,1,128,1,float16,fp8,0,0.3640431880950928
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,64,1,128,1,fp8,fp8,0,0.36493918895721433
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,64,2,128,1,float16,float16,0,0.35029120445251466
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,64,2,128,1,float16,fp8,0,0.36436479091644286
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,64,2,128,1,fp8,fp8,0,0.3637631893157959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,64,64,128,1,float16,float16,0,0.028880000114440918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,64,4,128,1,float16,float16,0,0.35924639701843264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,64,4,128,1,float16,fp8,0,0.36800479888916016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,64,4,128,1,fp8,fp8,0,0.36734719276428224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,4,128,1,float16,float16,0,0.012280000001192093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,64,8,128,1,float16,float16,0,0.37389440536499025
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,64,8,128,1,float16,fp8,0,0.3823247909545898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,4,128,1,fp8,fp8,0,0.012559999525547028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,8,128,1,float16,fp8,0,0.012804800271987915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,64,128,1,float16,float16,0,0.3644095897674561
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,64,128,1,float16,fp8,0,0.2949455976486206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,64,8,128,1,fp8,fp8,0,0.012692800164222718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,1,128,1,float16,float16,0,0.18038879632949828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,64,128,1,fp8,fp8,0,0.2930095911026001
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,2,128,1,float16,float16,0,0.18169599771499634
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,1,128,1,fp8,fp8,0,0.18649120330810548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,2,128,1,float16,fp8,0,0.1868880033493042
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,4,128,1,float16,float16,0,0.18381279706954956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,4,128,1,float16,fp8,0,0.1893615961074829
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,4,128,1,fp8,fp8,0,0.18969600200653075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,8,128,1,float16,float16,0,0.19343359470367433
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,8,128,1,float16,fp8,0,0.19441920518875122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,8,128,1,fp8,fp8,0,0.19572479724884034
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,64,128,1,float16,float16,0,0.18894239664077758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,64,128,1,float16,fp8,0,0.15147360563278198
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,64,128,1,fp8,fp8,0,0.15267679691314698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,1,128,1,float16,float16,0,0.09466080069541931
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,1,128,1,float16,fp8,0,0.10066080093383789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,2,128,1,float16,float16,0,0.09577280282974243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,2,128,1,float16,fp8,0,0.1005295991897583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,4,128,1,float16,float16,0,0.09828959703445435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,4,128,1,float16,fp8,0,0.10161279439926148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,4,128,1,fp8,fp8,0,0.10165920257568359
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,8,128,1,float16,float16,0,0.10238879919052124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,8,128,1,float16,fp8,0,0.10400639772415161
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,8,128,1,fp8,fp8,0,0.10447520017623901
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,64,128,1,float16,fp8,0,0.08275520205497741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,64,128,1,fp8,fp8,0,0.08239200115203857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,1,128,1,float16,float16,0,0.055318397283554074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,1,128,1,float16,fp8,0,0.054995197057724
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,64,8,128,1,fp8,fp8,0,0.3787983894348145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,1,128,1,fp8,fp8,0,0.0548799991607666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,2,128,1,fp8,fp8,0,0.05511519908905029
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,2,128,1,float16,fp8,0,0.055067199468612674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,1,128,1,float16,fp8,0,0.18612799644470215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,4,128,1,float16,float16,0,0.055851197242736815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,4,128,1,fp8,fp8,0,0.05563520193099976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,8,128,1,float16,float16,0,0.05778719782829285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,64,2,128,1,fp8,fp8,0,0.1877568006515503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,8,128,1,fp8,fp8,0,0.05697280168533325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,64,128,1,float16,float16,0,0.05447520017623901
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,64,128,1,float16,fp8,0,0.04236479997634888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,64,128,1,fp8,fp8,0,0.041756799817085265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,1,128,1,float16,float16,0,0.026907199621200563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,1,128,1,float16,fp8,0,0.027739199995994567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,1,128,1,fp8,fp8,0,0.02789439857006073
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,2,128,1,float16,float16,0,0.026976001262664796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,2,128,1,float16,fp8,0,0.027931201457977294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,1,128,1,fp8,fp8,0,0.10025759935379028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,2,128,1,fp8,fp8,0,0.02770400047302246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,4,128,1,float16,fp8,0,0.02770560085773468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,4,128,1,fp8,fp8,0,0.027907198667526244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,8,128,1,float16,float16,0,0.02895680069923401
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,8,128,1,float16,fp8,0,0.028062400221824647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,8,128,1,fp8,fp8,0,0.028044798970222475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,64,128,1,float16,float16,0,0.030220800638198854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,64,128,1,float16,fp8,0,0.02104160040616989
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,64,128,1,fp8,fp8,0,0.02093279957771301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,1,128,1,float16,float16,0,0.01730239987373352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,1,128,1,float16,fp8,0,0.017836800217628478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,1,128,1,fp8,fp8,0,0.01770720034837723
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,2,128,1,float16,float16,0,0.01719679981470108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,2,128,1,float16,float16,0,0.054992002248764035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,2,128,1,float16,fp8,0,0.017774400115013123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,2,128,1,fp8,fp8,0,0.01794400066137314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,4,128,1,float16,float16,0,0.01730400025844574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,4,128,1,float16,fp8,0,0.01796800047159195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,4,128,1,float16,fp8,0,0.05553920269012451
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,8,128,1,float16,float16,0,0.017788800597190856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,8,128,1,float16,fp8,0,0.0181551992893219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,8,128,1,float16,fp8,0,0.05664799809455871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,8,128,1,fp8,fp8,0,0.01818079948425293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,64,128,1,float16,float16,0,0.016174399852752687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,64,128,1,float16,fp8,0,0.014425599575042724
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,1,128,1,float16,float16,0,0.012454400211572647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,1,128,1,float16,fp8,0,0.012680000066757202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,1,128,1,fp8,fp8,0,0.012856000661849975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,2,128,1,float16,float16,0,0.01225920021533966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,2,128,1,fp8,fp8,0,0.012796799838542938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,4,128,1,float16,float16,0,0.012430399656295776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,4,128,1,float16,fp8,0,0.012836800515651703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,4,128,1,fp8,fp8,0,0.012868799269199371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,64,4,128,1,float16,float16,0,0.027671998739242552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,8,128,1,float16,float16,0,0.012624000012874604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,64,2,128,1,fp8,fp8,0,0.1009600043296814
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,8,128,1,float16,fp8,0,0.013038399815559387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,8,128,1,fp8,fp8,0,0.013259199261665345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,64,128,1,fp8,fp8,0,0.011311999708414077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,64,128,1,float16,float16,0,0.01242400035262108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,1,128,1,float16,float16,0,0.01215519979596138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,1,128,1,fp8,fp8,0,0.012409599870443344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,2,128,1,float16,float16,0,0.012067200243473053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,2,128,1,float16,fp8,0,0.01242239996790886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,2,128,1,fp8,fp8,0,0.012641599774360657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,4,128,1,float16,float16,0,0.012166400253772736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,4,128,1,float16,fp8,0,0.012545600533485413
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,4,128,1,fp8,fp8,0,0.012595200538635254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,8,128,1,float16,float16,0,0.01223199963569641
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,8,128,1,float16,fp8,0,0.012574400007724761
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,8,128,1,fp8,fp8,0,0.012470400333404541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,64,128,1,float16,float16,0,0.011832000315189361
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,64,4,128,1,fp8,fp8,0,0.01777759939432144
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,64,128,1,float16,fp8,0,0.010843200236558914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,1,128,1,float16,float16,0,0.01210559979081154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,1,128,1,float16,fp8,0,0.012505599856376648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,1,128,1,fp8,fp8,0,0.012481600046157837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,64,128,1,fp8,fp8,0,0.014398400485515595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,2,128,1,float16,float16,0,0.012009599804878235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,2,128,1,fp8,fp8,0,0.012238399684429168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,64,2,128,1,float16,fp8,0,0.012756800651550293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,4,128,1,float16,float16,0,0.011952000111341477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,4,128,1,float16,fp8,0,0.012457600235939026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,4,128,1,fp8,fp8,0,0.012417600303888322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,8,128,1,float16,float16,0,0.012041600048542022
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,8,128,1,fp8,fp8,0,0.01242239996790886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,8,128,1,float16,fp8,0,0.012617599964141846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,64,1,128,1,float16,fp8,0,0.1842095971107483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,64,1,128,1,fp8,fp8,0,0.1875056028366089
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,64,128,1,float16,fp8,0,0.011073599755764007
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,64,2,128,1,float16,float16,0,0.17936960458755494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,64,2,128,1,float16,fp8,0,0.18711040019989014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,64,1,128,1,float16,fp8,0,0.012654399871826172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,64,2,128,1,fp8,fp8,0,0.18708159923553466
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,64,4,128,1,float16,float16,0,0.18495839834213257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,64,64,128,1,float16,float16,0,0.10073599815368653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,64,4,128,1,fp8,fp8,0,0.18751840591430663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,64,8,128,1,float16,float16,0,0.19254080057144166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,64,8,128,1,float16,fp8,0,0.1966639995574951
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,64,8,128,1,fp8,fp8,0,0.19339200258255004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,64,128,1,float16,float16,0,0.2656847953796387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,64,128,1,float16,fp8,0,0.22016639709472657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,1,128,1,float16,float16,0,0.09503679871559143
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,1,128,1,float16,fp8,0,0.10105600357055664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,1,128,1,fp8,fp8,0,0.10026079416275024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,64,128,1,fp8,fp8,0,0.221016001701355
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,2,128,1,float16,fp8,0,0.10100159645080567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,2,128,1,fp8,fp8,0,0.09993759989738464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,4,128,1,float16,float16,0,0.09779040217399597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,64,128,1,fp8,fp8,0,0.010760000348091126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,4,128,1,float16,fp8,0,0.10129599571228028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,4,128,1,fp8,fp8,0,0.10044319629669189
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,8,128,1,float16,float16,0,0.10224000215530396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,8,128,1,fp8,fp8,0,0.10327680110931396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,8,128,1,float16,fp8,0,0.10444960594177247
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,64,128,1,float16,float16,0,0.13892799615859985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,64,128,1,float16,fp8,0,0.11615999937057495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,1,128,1,float16,float16,0,0.05555199980735779
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,1,128,1,float16,fp8,0,0.05520480275154114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,1,128,1,fp8,fp8,0,0.05509759783744812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,2,128,1,float16,float16,0,0.055134397745132444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,2,128,1,float16,fp8,0,0.05505440235137939
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,2,128,1,fp8,fp8,0,0.05508480072021484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,4,128,1,float16,float16,0,0.05670560002326965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,4,128,1,float16,fp8,0,0.055929601192474365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,4,128,1,fp8,fp8,0,0.055632001161575316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,8,128,1,float16,float16,0,0.058987200260162354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,8,128,1,float16,fp8,0,0.057214397192001346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,64,1,128,1,float16,float16,0,0.17861759662628174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,8,128,1,fp8,fp8,0,0.057361602783203125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,64,128,1,float16,float16,0,0.07408480048179626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,64,128,1,float16,fp8,0,0.062431997060775755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,64,128,1,fp8,fp8,0,0.0619983971118927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,1,128,1,float16,fp8,0,0.028091201186180116
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,2,128,1,float16,float16,0,0.02731359899044037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,2,128,1,float16,fp8,0,0.02794559895992279
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,2,128,1,fp8,fp8,0,0.027923199534416198
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,4,128,1,float16,float16,0,0.027728000283241273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,4,128,1,float16,fp8,0,0.028046399354934692
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,4,128,1,fp8,fp8,0,0.02805120050907135
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,8,128,1,float16,float16,0,0.029297599196434022
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,8,128,1,float16,fp8,0,0.02879520058631897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,8,128,1,fp8,fp8,0,0.02873600125312805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,64,128,1,float16,float16,0,0.04118399918079376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,64,128,1,float16,fp8,0,0.030535998940467834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,64,2,128,1,float16,float16,0,0.09521120190620422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,64,128,1,fp8,fp8,0,0.03030880093574524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,1,128,1,float16,float16,0,0.01704320013523102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,1,128,1,float16,fp8,0,0.017895999550819396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,1,128,1,fp8,fp8,0,0.01769919991493225
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,2,128,1,float16,float16,0,0.017155200242996216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,2,128,1,float16,fp8,0,0.017657600343227386
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,2,128,1,fp8,fp8,0,0.01767839938402176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,4,128,1,float16,float16,0,0.017212800681591034
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,4,128,1,float16,fp8,0,0.017785599827766417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,4,128,1,fp8,fp8,0,0.017731200158596038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,8,128,1,float16,float16,0,0.01738879978656769
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,8,128,1,float16,fp8,0,0.017975999414920805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,64,8,128,1,fp8,fp8,0,0.017827199399471284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,64,64,128,1,fp8,fp8,0,0.11641440391540528
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,64,2,128,1,float16,fp8,0,0.012520000338554382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,64,128,1,float16,float16,0,0.02128479927778244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,64,128,1,float16,fp8,0,0.018918399512767792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,64,128,1,fp8,fp8,0,0.018972800672054292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,1,128,1,float16,fp8,0,0.012992000579833985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,1,128,1,fp8,fp8,0,0.013006399571895599
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,2,128,1,float16,float16,0,0.012742400169372559
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,2,128,1,fp8,fp8,0,0.012929600477218629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,4,128,1,float16,float16,0,0.012806400656700134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,4,128,1,float16,fp8,0,0.01297439932823181
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,4,128,1,fp8,fp8,0,0.01308799982070923
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,8,128,1,float16,float16,0,0.012868799269199371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,8,128,1,float16,fp8,0,0.013166399300098419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,8,128,1,fp8,fp8,0,0.012960000336170197
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,64,128,1,float16,float16,0,0.014703999459743499
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,64,128,1,float16,fp8,0,0.01387999951839447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,64,128,1,fp8,fp8,0,0.013571199774742127
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,1,128,1,float16,float16,0,0.02717919945716858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,1,128,1,float16,fp8,0,0.012265600264072418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,1,128,1,fp8,fp8,0,0.012399999797344208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,2,128,1,float16,float16,0,0.012030400335788727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,2,128,1,float16,fp8,0,0.01252480000257492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,2,128,1,fp8,fp8,0,0.01252799928188324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,4,128,1,float16,float16,0,0.01207360029220581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,64,4,128,1,float16,fp8,0,0.18917280435562134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,4,128,1,float16,fp8,0,0.012595200538635254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,4,128,1,fp8,fp8,0,0.01255359947681427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,8,128,1,float16,float16,0,0.01234079971909523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,8,128,1,fp8,fp8,0,0.01271039992570877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,64,128,1,float16,fp8,0,0.011126399785280228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,64,128,1,fp8,fp8,0,0.010995200276374817
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,1,128,1,float16,float16,0,0.012004800140857697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,1,128,1,float16,fp8,0,0.012323199957609176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,1,128,1,fp8,fp8,0,0.012388800084590913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,2,128,1,float16,float16,0,0.012041600048542022
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,2,128,1,float16,fp8,0,0.01233920007944107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,2,128,1,fp8,fp8,0,0.01239359974861145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,4,128,1,float16,float16,0,0.01223199963569641
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,4,128,1,float16,fp8,0,0.012432000041007996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,4,128,1,fp8,fp8,0,0.012620800733566284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,8,128,1,float16,float16,0,0.012084800004959106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,8,128,1,float16,fp8,0,0.012547199428081513
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,8,128,1,fp8,fp8,0,0.012436799705028534
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,64,128,1,float16,float16,0,0.011847999691963196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,64,128,1,float16,fp8,0,0.01077599972486496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,64,128,1,fp8,fp8,0,0.01101439967751503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,1,128,1,float16,float16,0,0.011804799735546111
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,1,128,1,float16,fp8,0,0.012015999853610992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,1,128,1,fp8,fp8,0,0.012275200337171555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,2,128,1,float16,float16,0,0.011819200217723846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,2,128,1,float16,fp8,0,0.012345600128173827
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,2,128,1,fp8,fp8,0,0.012171199917793274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,4,128,1,float16,float16,0,0.011734399944543839
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,4,128,1,float16,fp8,0,0.012003199756145477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,4,128,1,fp8,fp8,0,0.01218239963054657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,8,128,1,float16,float16,0,0.011798399686813354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,8,128,1,float16,fp8,0,0.011937599629163742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,64,8,128,1,fp8,fp8,0,0.01202400028705597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,64,1,128,1,float16,float16,0,0.09472000002861022
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,64,1,128,1,float16,fp8,0,0.10162880420684814
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,64,1,128,1,fp8,fp8,0,0.10081599950790406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,1,128,1,float16,float16,0,0.012740799784660339
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,64,2,128,1,float16,fp8,0,0.012966400384902954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,64,2,128,1,float16,float16,0,0.09570720195770263
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,64,2,128,1,float16,fp8,0,0.10162240266799927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,64,2,128,1,fp8,fp8,0,0.10245920419692993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,64,4,128,1,float16,float16,0,0.09830399751663207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,64,4,128,1,float16,fp8,0,0.1025231957435608
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,64,4,128,1,fp8,fp8,0,0.1032863974571228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,64,8,128,1,float16,fp8,0,0.10490239858627319
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,1,128,1,float16,float16,0,0.012142399698495865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,64,8,128,1,fp8,fp8,0,0.10489280223846435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,64,128,1,float16,float16,0,0.22513918876647948
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,64,128,1,float16,fp8,0,0.19431519508361816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,64,128,1,fp8,fp8,0,0.19409760236740112
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,1,128,1,float16,fp8,0,0.05610079765319824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,64,8,128,1,float16,fp8,0,0.012759999930858612
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,2,128,1,float16,float16,0,0.056417602300643924
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,2,128,1,float16,fp8,0,0.05661600232124329
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,4,128,1,float16,float16,0,0.057004797458648684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,4,128,1,float16,fp8,0,0.056910401582717894
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,4,128,1,fp8,fp8,0,0.05686560273170471
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,8,128,1,float16,float16,0,0.05955359935760498
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,8,128,1,float16,fp8,0,0.058911997079849246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,8,128,1,fp8,fp8,0,0.05881279706954956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,64,128,1,float16,float16,0,0.11864639520645141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,64,128,1,float16,fp8,0,0.10271199941635131
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,64,128,1,fp8,fp8,0,0.10222879648208619
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,1,128,1,float16,float16,0,0.027432000637054442
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,1,128,1,float16,fp8,0,0.028443199396133424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,1,128,1,fp8,fp8,0,0.028489598631858827
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,2,128,1,float16,float16,0,0.02770879864692688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,2,128,1,float16,fp8,0,0.028401601314544677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,64,8,128,1,float16,float16,0,0.10221439599990845
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,2,128,1,fp8,fp8,0,0.028398400545120238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,4,128,1,float16,float16,0,0.027694401144981385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,4,128,1,float16,fp8,0,0.028545600175857545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,4,128,1,fp8,fp8,0,0.028454399108886717
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,8,128,1,float16,float16,0,0.02967360019683838
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,8,128,1,float16,fp8,0,0.028683200478553772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,64,8,128,1,fp8,fp8,0,0.028859201073646545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,64,128,1,float16,float16,0,0.06415839791297913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,64,128,1,float16,fp8,0,0.05060160160064697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,64,128,1,fp8,fp8,0,0.04975680112838745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,1,128,1,float16,float16,0,0.05630559921264648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,1,128,1,float16,fp8,0,0.01818400025367737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,1,128,1,fp8,fp8,0,0.01836320012807846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,64,64,128,1,float16,float16,0,0.011662399768829346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,2,128,1,float16,fp8,0,0.018267199397087097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,2,128,1,fp8,fp8,0,0.05618240237236023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,2,128,1,fp8,fp8,0,0.018134400248527527
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,4,128,1,float16,fp8,0,0.018262399733066557
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,8,128,1,float16,float16,0,0.0179407998919487
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,8,128,1,float16,fp8,0,0.01834080070257187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,8,128,1,fp8,fp8,0,0.018361599743366243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,64,128,1,float16,float16,0,0.030856001377105712
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,64,128,1,float16,fp8,0,0.029073598980903625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,64,128,1,fp8,fp8,0,0.029104000329971312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,1,128,1,float16,float16,0,0.012803199887275695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,1,128,1,float16,fp8,0,0.013079999387264252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,1,128,1,fp8,fp8,0,0.012923200428485871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,2,128,1,float16,float16,0,0.01268479973077774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,2,128,1,float16,fp8,0,0.012921600043773651
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,2,128,1,fp8,fp8,0,0.012988799810409546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,4,128,1,float16,float16,0,0.012889599800109864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,4,128,1,float16,fp8,0,0.012967999279499053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,4,128,1,fp8,fp8,0,0.012993599474430084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,8,128,1,float16,fp8,0,0.013033600151538849
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,8,128,1,fp8,fp8,0,0.013094399869441987
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,64,128,1,float16,float16,0,0.020172800123691558
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,64,128,1,float16,fp8,0,0.018379199504852294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,64,128,1,fp8,fp8,0,0.01839040070772171
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,64,1,128,1,fp8,fp8,0,0.028110399842262268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,1,128,1,float16,float16,0,0.012144000083208085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,1,128,1,fp8,fp8,0,0.012516799569129943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,2,128,1,float16,float16,0,0.012110400199890136
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,2,128,1,float16,fp8,0,0.012571200728416443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,64,1,128,1,fp8,fp8,0,0.05602880120277405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,2,128,1,fp8,fp8,0,0.012700800597667695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,4,128,1,float16,float16,0,0.012151999771595002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,4,128,1,float16,fp8,0,0.012619200348854064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,4,128,1,fp8,fp8,0,0.012577599287033081
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,4,128,1,float16,float16,0,0.017867200076580048
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,8,128,1,float16,fp8,0,0.012694400548934937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,4,128,1,fp8,fp8,0,0.0183119997382164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,64,128,1,float16,float16,0,0.014508800208568573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,64,128,1,fp8,fp8,0,0.013390399515628815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,1,128,1,float16,float16,0,0.012068799883127212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,1,128,1,float16,fp8,0,0.012451200187206269
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,1,128,1,fp8,fp8,0,0.012291199713945388
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,2,128,1,float16,float16,0,0.01191840022802353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,2,128,1,float16,fp8,0,0.012278400361537933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,2,128,1,fp8,fp8,0,0.012412799894809723
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,4,128,1,float16,float16,0,0.012121599912643433
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,4,128,1,float16,fp8,0,0.012425599992275238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,4,128,1,fp8,fp8,0,0.012174399942159653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,8,128,1,float16,float16,0,0.011958400160074234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,8,128,1,float16,fp8,0,0.012374400347471236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,8,128,1,fp8,fp8,0,0.012326399981975555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,64,128,1,float16,float16,0,0.011795199662446975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,64,8,128,1,float16,float16,0,0.012895999848842621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,64,128,1,float16,fp8,0,0.010856000334024429
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,64,128,1,fp8,fp8,0,0.010945600271224976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,1,128,1,float16,float16,0,0.011907199770212174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,1,128,1,float16,fp8,0,0.01202080026268959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,1,128,1,fp8,fp8,0,0.012031999975442886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,2,128,1,float16,float16,0,0.011947199702262878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,2,128,1,float16,fp8,0,0.012358400225639343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,2,128,1,fp8,fp8,0,0.012185599654912949
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,1,128,1,float16,fp8,0,0.012569600343704223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,4,128,1,float16,float16,0,0.011964800208806992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,4,128,1,float16,fp8,0,0.012151999771595002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,4,128,1,fp8,fp8,0,0.012219200283288956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,8,128,1,float16,float16,0,0.01157120019197464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,8,128,1,float16,fp8,0,0.011990399658679962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,64,8,128,1,fp8,fp8,0,0.01186240017414093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,64,128,1,float16,float16,0,0.01154559999704361
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,64,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,64,128,1,fp8,fp8,0,0.010694400221109391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,1,128,1,float16,float16,0,0.011422400176525117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,8,128,1,float16,float16,0,0.012495999783277511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,1,128,1,float16,fp8,0,0.012064000219106674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,1,128,1,fp8,fp8,0,0.01186719983816147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,2,128,1,float16,float16,0,0.011726400256156922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,2,128,1,float16,fp8,0,0.011739200353622437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,2,128,1,fp8,fp8,0,0.011939200013875962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,4,128,1,float16,float16,0,0.01162080019712448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,4,128,1,float16,fp8,0,0.011843200027942657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,64,8,128,1,fp8,fp8,0,0.012547199428081513
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,8,128,1,float16,float16,0,0.01162080019712448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,64,64,128,1,float16,fp8,0,0.01343040019273758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,8,128,1,fp8,fp8,0,0.012006399780511856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,64,1,128,1,fp8,fp8,0,0.01835840046405792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,64,2,128,1,float16,float16,0,0.023873600363731384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,64,2,128,1,float16,fp8,0,0.023691199719905853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,64,2,128,1,fp8,fp8,0,0.023686400055885314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,64,4,128,1,float16,float16,0,0.035076799988746646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,64,4,128,1,float16,fp8,0,0.034745600819587705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,64,4,128,1,fp8,fp8,0,0.03475199937820435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,64,8,128,1,float16,float16,0,0.05654240250587463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,64,8,128,1,float16,fp8,0,0.05623040199279785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,64,8,128,1,fp8,fp8,0,0.05636320114135742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,64,128,1,float16,float16,0,0.16504000425338744
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,64,128,1,float16,fp8,0,0.16061439514160156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,64,128,1,fp8,fp8,0,0.16060320138931275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,1,128,1,float16,float16,0,0.012807999551296235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,1,128,1,float16,fp8,0,0.012535999715328216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,1,128,1,fp8,fp8,0,0.012700800597667695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,2,128,1,float16,float16,0,0.01552799940109253
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,1,128,1,float16,float16,0,0.017688000202178956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,2,128,1,float16,fp8,0,0.015412800014019012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,2,128,1,fp8,fp8,0,0.015343999862670899
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,4,128,1,float16,float16,0,0.021206399798393248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,4,128,1,float16,fp8,0,0.0209647998213768
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,4,128,1,fp8,fp8,0,0.020980800688266753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,8,128,1,float16,float16,0,0.032364800572395325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,8,128,1,float16,fp8,0,0.031851199269294736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,4,128,1,fp8,fp8,0,0.0119439996778965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,64,8,128,1,float16,fp8,0,0.011961600184440613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,64,1,128,1,float16,float16,0,0.018087999522686006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,64,128,1,float16,fp8,0,0.08482559919357299
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,64,128,1,fp8,fp8,0,0.08507360219955444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,64,1,128,1,float16,fp8,0,0.01807679980993271
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,1,128,1,float16,fp8,0,0.011104000359773636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,1,128,1,fp8,fp8,0,0.011310400068759918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,2,128,1,float16,fp8,0,0.011327999830245971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,2,128,1,float16,float16,0,0.011369600147008895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,2,128,1,fp8,fp8,0,0.011611200124025344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,4,128,1,float16,float16,0,0.013899199664592743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,4,128,1,fp8,fp8,0,0.014110399782657624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,8,128,1,float16,float16,0,0.019419200718402863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,8,128,1,float16,fp8,0,0.019678400456905366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,8,128,1,fp8,fp8,0,0.019099199771881105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,64,128,1,float16,float16,0,0.04899680018424988
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,64,128,1,float16,fp8,0,0.04736000001430511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,64,128,1,fp8,fp8,0,0.047700798511505126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,1,128,1,float16,float16,0,0.01032159999012947
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,1,128,1,float16,fp8,0,0.010791999846696853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,1,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,2,128,1,float16,float16,0,0.011036799848079681
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,2,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,2,128,1,fp8,fp8,0,0.010790400207042694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,4,128,1,float16,float16,0,0.010689599812030793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,4,128,1,float16,fp8,0,0.010822399705648422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,4,128,1,fp8,fp8,0,0.01066880002617836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,8,128,1,float16,float16,0,0.013494400680065155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,8,128,1,float16,fp8,0,0.013099199533462525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,64,8,128,1,fp8,fp8,0,0.01329279989004135
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,64,128,1,float16,float16,0,0.029083201289176942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,64,128,1,float16,fp8,0,0.02783840000629425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,64,128,1,fp8,fp8,0,0.02794399857521057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,1,128,1,float16,float16,0,0.01005759984254837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,1,128,1,float16,fp8,0,0.009796799719333648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,1,128,1,fp8,fp8,0,0.009940800070762635
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,2,128,1,float16,float16,0,0.010097599774599075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,2,128,1,float16,fp8,0,0.009944000095129014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,2,128,1,fp8,fp8,0,0.009990400075912476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,4,128,1,float16,float16,0,0.01027040034532547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,4,128,1,float16,fp8,0,0.010025600343942643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,4,128,1,fp8,fp8,0,0.010016000270843506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,8,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,8,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,64,128,1,float16,float16,0,0.019388799369335175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,64,128,1,float16,fp8,0,0.018104000389575957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,64,128,1,fp8,fp8,0,0.017979200184345245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,1,128,1,float16,float16,0,0.010091199725866317
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,1,128,1,float16,fp8,0,0.009859199821949004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,64,2,128,1,float16,float16,0,0.017632000148296356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,2,128,1,float16,float16,0,0.010011199861764908
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,2,128,1,float16,fp8,0,0.00979520007967949
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,2,128,1,fp8,fp8,0,0.009769599884748459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,64,8,128,1,fp8,fp8,0,0.03172959983348846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,64,128,1,float16,float16,0,0.08773919939994812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,4,128,1,float16,fp8,0,0.009772799909114838
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,8,128,1,float16,float16,0,0.010089600086212158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,8,128,1,float16,fp8,0,0.00995519980788231
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,1,128,1,float16,float16,0,0.011308799684047698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,8,128,1,fp8,fp8,0,0.009971199929714203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,64,128,1,float16,float16,0,0.014139199256896972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,64,128,1,float16,fp8,0,0.013023999333381654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,64,4,128,1,float16,fp8,0,0.013963200151920319
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,64,128,1,fp8,fp8,0,0.013393600285053254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,1,128,1,float16,float16,0,0.009494400024414063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,1,128,1,float16,fp8,0,0.009244800359010697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,1,128,1,fp8,fp8,0,0.009486400336027146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,2,128,1,float16,float16,0,0.009623999893665313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,2,128,1,float16,fp8,0,0.009131199866533279
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,2,128,1,fp8,fp8,0,0.009431999921798707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,4,128,1,float16,float16,0,0.009824000298976898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,4,128,1,float16,fp8,0,0.009539200365543366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,4,128,1,fp8,fp8,0,0.009719999879598618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,8,128,1,float16,fp8,0,0.009729599952697754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,8,128,1,fp8,fp8,0,0.009808000177145004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,64,128,1,float16,float16,0,0.011374399811029435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,64,128,1,float16,fp8,0,0.010667199641466141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,64,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,1,128,1,float16,float16,0,0.009771200269460678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,1,128,1,float16,fp8,0,0.009513600170612336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,1,128,1,fp8,fp8,0,0.009516800194978714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,2,128,1,float16,float16,0,0.009574399888515472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,2,128,1,float16,fp8,0,0.0094991996884346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,2,128,1,fp8,fp8,0,0.009275200217962265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,4,128,1,float16,float16,0,0.009654399752616883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,4,128,1,float16,fp8,0,0.009233599901199341
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,4,128,1,fp8,fp8,0,0.009334400296211243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,8,128,1,float16,float16,0,0.009614399820566177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,8,128,1,float16,fp8,0,0.009728000313043595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,64,8,128,1,fp8,fp8,0,0.009753599762916565
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,64,128,1,float16,float16,0,0.011427199840545655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,64,128,1,float16,fp8,0,0.010550399869680404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,64,128,1,fp8,fp8,0,0.010620799660682679
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,1,128,1,float16,float16,0,0.009745600074529648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,1,128,1,float16,fp8,0,0.00955360010266304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,1,128,1,fp8,fp8,0,0.00955199971795082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,64,8,128,1,float16,fp8,0,0.010239999741315842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,2,128,1,float16,fp8,0,0.00952799990773201
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,2,128,1,fp8,fp8,0,0.009462399780750275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,4,128,1,float16,float16,0,0.009585600346326828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,4,128,1,float16,fp8,0,0.009279999881982803
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,4,128,1,fp8,fp8,0,0.00929120033979416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,8,128,1,float16,float16,0,0.009331200271844864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,8,128,1,float16,fp8,0,0.009292799979448318
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,8,128,1,fp8,fp8,0,0.009294400364160538
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,1,128,1,fp8,fp8,0,0.009851200133562088
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,4,128,1,float16,float16,0,0.009809599816799163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,64,4,128,1,fp8,fp8,0,0.009859199821949004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,64,8,128,1,float16,float16,0,0.009825599938631057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,64,2,128,1,float16,float16,0,0.009747199714183807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,48,1,128,1,float16,fp8,0,36.32100830078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,48,1,128,1,fp8,fp8,0,36.34034729003906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,48,2,128,1,float16,fp8,0,35.99068908691406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,48,2,128,1,fp8,fp8,0,35.66346435546875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,48,4,128,1,float16,fp8,0,36.235968017578124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,48,1,128,1,float16,float16,0,45.53132019042969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,48,2,128,1,float16,float16,0,46.888937377929686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,48,4,128,1,float16,float16,0,46.72184448242187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,48,128,1,float16,float16,0,23.90405731201172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,1,128,1,float16,float16,0,22.735792541503905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,48,4,128,1,fp8,fp8,0,36.8764892578125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,48,8,128,1,float16,fp8,0,36.2961669921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,48,8,128,1,fp8,fp8,0,35.955178833007814
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,1,128,1,float16,fp8,0,18.033018493652342
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,48,8,128,1,float16,float16,0,47.22298583984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,1,128,1,fp8,fp8,0,18.166897583007813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,2,128,1,float16,fp8,0,18.126139831542968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,2,128,1,fp8,fp8,0,18.00230255126953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,2,128,1,float16,float16,0,23.45050506591797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,4,128,1,float16,fp8,0,18.18666229248047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,4,128,1,float16,float16,0,23.370639038085937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,4,128,1,fp8,fp8,0,18.266799926757812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,8,128,1,float16,fp8,0,18.0268798828125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,48,128,1,float16,float16,0,12.364059448242188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,8,128,1,fp8,fp8,0,18.49828186035156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,8,128,1,float16,float16,0,23.393289184570314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,1,128,1,float16,float16,0,11.641891479492188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,1,128,1,float16,fp8,0,9.26326904296875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,1,128,1,fp8,fp8,0,9.256902313232422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,2,128,1,float16,float16,0,11.27726058959961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,2,128,1,float16,fp8,0,8.859028625488282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,2,128,1,fp8,fp8,0,9.109180450439453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,4,128,1,float16,float16,0,12.087062072753906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,4,128,1,float16,fp8,0,8.944316864013672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,4,128,1,fp8,fp8,0,8.879443359375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,8,128,1,float16,fp8,0,8.934374237060547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,48,128,1,float16,float16,0,5.718235015869141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,8,128,1,float16,float16,0,11.678421020507812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,8,128,1,fp8,fp8,0,8.941712188720704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,1,128,1,float16,fp8,0,4.501174545288086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,1,128,1,float16,float16,0,5.745115280151367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,1,128,1,fp8,fp8,0,4.57946548461914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,2,128,1,float16,float16,0,5.749521636962891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,2,128,1,float16,fp8,0,4.4868721008300785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,2,128,1,fp8,fp8,0,4.632974243164062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,4,128,1,float16,float16,0,5.809345626831055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,4,128,1,float16,fp8,0,4.575001525878906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,4,128,1,fp8,fp8,0,4.514689636230469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,8,128,1,float16,float16,0,6.069160079956054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,8,128,1,float16,fp8,0,4.438870239257812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,8,128,1,fp8,fp8,0,4.676153564453125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,48,1,128,1,float16,fp8,0,21.087767028808592
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,48,1,128,1,float16,float16,0,25.881549072265624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,48,1,128,1,fp8,fp8,0,21.007049560546875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,48,128,1,float16,fp8,0,4.513598251342773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,48,128,1,fp8,fp8,0,9.206752014160156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,48,48,128,1,fp8,fp8,0,4.569060897827148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,48,2,128,1,float16,float16,0,25.631134033203125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,48,48,128,1,float16,fp8,0,9.153196716308594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,48,128,1,float16,fp8,0,18.293408203125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,48,48,128,1,fp8,fp8,0,18.644686889648437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,48,2,128,1,float16,fp8,0,21.20338592529297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,48,2,128,1,fp8,fp8,0,20.167906188964842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,48,4,128,1,float16,fp8,0,20.697654724121094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,48,4,128,1,fp8,fp8,0,20.546903991699217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,48,4,128,1,float16,float16,0,26.667388916015625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,48,8,128,1,float16,float16,0,26.42757568359375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,48,128,1,float16,fp8,0,11.105115509033203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,48,8,128,1,float16,fp8,0,21.121951293945312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,48,128,1,float16,float16,0,14.340771484375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,48,128,1,fp8,fp8,0,11.196115112304687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,48,8,128,1,fp8,fp8,0,21.58238067626953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,1,128,1,float16,float16,0,12.87099609375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,1,128,1,float16,fp8,0,10.252259063720704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,1,128,1,fp8,fp8,0,10.174585723876953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,2,128,1,float16,fp8,0,10.105010986328125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,2,128,1,fp8,fp8,0,10.38610382080078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,2,128,1,float16,float16,0,13.222921752929688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,4,128,1,float16,fp8,0,10.356006622314453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,4,128,1,fp8,fp8,0,10.218241882324218
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,4,128,1,float16,float16,0,13.230191040039063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,48,128,1,float16,fp8,0,5.279064178466797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,48,128,1,float16,float16,0,7.04195556640625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,8,128,1,float16,fp8,0,10.300233459472656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,48,128,1,fp8,fp8,0,5.387782287597656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,8,128,1,float16,float16,0,13.209791564941407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,48,8,128,1,fp8,fp8,0,10.219757080078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,1,128,1,float16,fp8,0,5.173654556274414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,1,128,1,fp8,fp8,0,5.026776123046875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,2,128,1,float16,fp8,0,5.079244613647461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,2,128,1,float16,float16,0,6.553294372558594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,2,128,1,fp8,fp8,0,5.137726211547852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,1,128,1,float16,float16,0,6.498117065429687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,4,128,1,float16,fp8,0,5.0504638671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,4,128,1,float16,float16,0,6.484824371337891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,4,128,1,fp8,fp8,0,5.140206527709961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,48,128,1,fp8,fp8,0,2.6121679306030274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,48,128,1,float16,fp8,0,2.5797296524047852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,48,128,1,float16,float16,0,3.4351760864257814
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,8,128,1,float16,float16,0,6.515878295898437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,1,128,1,float16,fp8,0,2.518087959289551
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,8,128,1,float16,fp8,0,5.112580871582031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,1,128,1,float16,float16,0,3.187446403503418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,48,8,128,1,fp8,fp8,0,5.227803039550781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,1,128,1,fp8,fp8,0,2.546072006225586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,2,128,1,float16,float16,0,3.0650896072387694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,2,128,1,float16,fp8,0,2.833839988708496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,2,128,1,fp8,fp8,0,2.6602975845336916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,4,128,1,float16,fp8,0,2.538862419128418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,4,128,1,float16,float16,0,3.1940975189208984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,4,128,1,fp8,fp8,0,2.757423973083496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,8,128,1,float16,float16,0,3.2666881561279295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,8,128,1,float16,fp8,0,2.622808074951172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,48,8,128,1,fp8,fp8,0,2.668854331970215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,48,1,128,1,float16,fp8,0,14.617393493652344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,48,1,128,1,fp8,fp8,0,14.612559509277343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,48,2,128,1,float16,fp8,0,14.57623748779297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,48,4,128,1,float16,fp8,0,14.199981689453125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,48,2,128,1,fp8,fp8,0,14.788226318359374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,48,1,128,1,float16,float16,0,19.11690673828125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,48,2,128,1,float16,float16,0,18.82042999267578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,48,4,128,1,float16,float16,0,18.353073120117188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,48,128,1,float16,float16,0,9.627302551269532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,48,128,1,float16,fp8,0,7.5883949279785154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,48,128,1,fp8,fp8,0,7.634400177001953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,48,4,128,1,fp8,fp8,0,14.363238525390624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,1,128,1,float16,float16,0,8.876793670654298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,48,8,128,1,float16,fp8,0,14.5720947265625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,48,8,128,1,fp8,fp8,0,14.949470520019531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,48,8,128,1,float16,float16,0,18.594786071777342
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,1,128,1,fp8,fp8,0,7.16094741821289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,1,128,1,float16,fp8,0,7.327767944335937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,2,128,1,float16,fp8,0,7.09970703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,2,128,1,fp8,fp8,0,7.0997261047363285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,2,128,1,float16,float16,0,9.189842987060548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,4,128,1,float16,fp8,0,7.407288360595703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,4,128,1,float16,float16,0,9.740402984619141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,48,128,1,float16,float16,0,4.714425659179687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,48,128,1,float16,fp8,0,3.784312057495117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,4,128,1,fp8,fp8,0,7.150644683837891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,48,128,1,fp8,fp8,0,3.8545505523681642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,8,128,1,fp8,fp8,0,7.100739288330078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,8,128,1,float16,fp8,0,7.103192138671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,48,8,128,1,float16,float16,0,9.134053039550782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,1,128,1,float16,fp8,0,3.4916847229003904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,1,128,1,fp8,fp8,0,3.6234222412109376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,1,128,1,float16,float16,0,4.503084945678711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,2,128,1,float16,fp8,0,3.5865455627441407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,2,128,1,float16,float16,0,4.432977676391602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,2,128,1,fp8,fp8,0,3.524534225463867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,4,128,1,float16,float16,0,4.472758483886719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,4,128,1,float16,fp8,0,3.6868785858154296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,48,128,1,float16,float16,0,2.38428955078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,4,128,1,fp8,fp8,0,3.503972625732422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,48,128,1,float16,fp8,0,1.8369199752807617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,8,128,1,fp8,fp8,0,3.6008880615234373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,8,128,1,float16,fp8,0,3.6490814208984377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,48,8,128,1,float16,float16,0,4.706497573852539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,48,128,1,fp8,fp8,0,1.983750343322754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,1,128,1,float16,fp8,0,1.8863056182861329
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,1,128,1,fp8,fp8,0,1.7511968612670898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,1,128,1,float16,float16,0,2.093326377868652
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,2,128,1,float16,float16,0,2.095787239074707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,2,128,1,fp8,fp8,0,1.7678352355957032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,4,128,1,float16,fp8,0,1.7869407653808593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,4,128,1,float16,float16,0,2.2848127365112303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,4,128,1,fp8,fp8,0,1.7663856506347657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,8,128,1,float16,fp8,0,1.751793670654297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,8,128,1,float16,float16,0,2.2783344268798826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,8,128,1,fp8,fp8,0,1.9173040390014648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,48,2,128,1,float16,fp8,0,1.7898031234741212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,48,1,128,1,float16,fp8,0,18.58748016357422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,48,2,128,1,float16,fp8,0,18.379005432128906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,48,1,128,1,fp8,fp8,0,18.97959747314453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,48,2,128,1,fp8,fp8,0,19.260504150390624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,48,4,128,1,float16,fp8,0,18.738372802734375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,48,1,128,1,float16,float16,0,24.448220825195314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,48,2,128,1,float16,float16,0,24.432859802246092
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,48,4,128,1,float16,float16,0,24.377757263183593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,48,128,1,float16,fp8,0,9.957360076904298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,48,128,1,float16,float16,0,12.681098937988281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,48,128,1,fp8,fp8,0,9.840052795410156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,1,128,1,float16,float16,0,11.56409149169922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,48,4,128,1,fp8,fp8,0,18.862028503417967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,48,8,128,1,float16,fp8,0,19.548487854003906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,48,8,128,1,fp8,fp8,0,19.42473449707031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,48,8,128,1,float16,float16,0,24.39116973876953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,1,128,1,float16,fp8,0,9.320478057861328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,1,128,1,fp8,fp8,0,9.620606231689454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,2,128,1,float16,fp8,0,9.231999969482422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,2,128,1,float16,float16,0,11.768598175048828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,2,128,1,fp8,fp8,0,9.272561645507812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,4,128,1,float16,fp8,0,9.47616958618164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,4,128,1,float16,float16,0,12.167861175537109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,48,128,1,float16,fp8,0,4.913796615600586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,48,128,1,float16,float16,0,6.415974426269531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,4,128,1,fp8,fp8,0,9.231699371337891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,8,128,1,float16,fp8,0,9.401358032226563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,48,128,1,fp8,fp8,0,5.117940902709961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,8,128,1,float16,float16,0,12.33951187133789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,48,8,128,1,fp8,fp8,0,9.4531005859375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,1,128,1,float16,fp8,0,4.708518218994141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,1,128,1,float16,float16,0,6.020321655273437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,1,128,1,fp8,fp8,0,4.735851287841797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,2,128,1,float16,fp8,0,4.631592178344727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,2,128,1,float16,float16,0,5.737790298461914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,2,128,1,fp8,fp8,0,4.7809089660644535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,4,128,1,float16,fp8,0,4.792652893066406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,4,128,1,fp8,fp8,0,4.680543899536133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,4,128,1,float16,float16,0,6.369849777221679
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,48,128,1,float16,fp8,0,2.4717056274414064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,8,128,1,float16,fp8,0,4.7792705535888675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,48,128,1,fp8,fp8,0,2.462513542175293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,8,128,1,float16,float16,0,6.083832168579102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,48,8,128,1,fp8,fp8,0,4.704235076904297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,1,128,1,float16,float16,0,2.7241119384765624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,1,128,1,float16,fp8,0,2.4634815216064454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,1,128,1,fp8,fp8,0,2.3620800018310546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,48,128,1,float16,float16,0,3.0219648361206053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,2,128,1,float16,fp8,0,2.294361686706543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,2,128,1,float16,float16,0,2.856420707702637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,2,128,1,fp8,fp8,0,2.399051284790039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,4,128,1,float16,float16,0,2.818716812133789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,4,128,1,fp8,fp8,0,2.3142160415649413
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,4,128,1,float16,fp8,0,2.6678512573242186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,48,128,1,float16,fp8,0,1.2211008071899414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,48,128,1,float16,float16,0,1.5683039665222167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,8,128,1,float16,float16,0,2.9967456817626954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,48,128,1,fp8,fp8,0,1.3075360298156737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,8,128,1,fp8,fp8,0,2.3056095123291014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,1,128,1,float16,float16,0,1.3601951599121094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,1,128,1,float16,fp8,0,1.197060775756836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,1,128,1,fp8,fp8,0,1.185041618347168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,2,128,1,float16,fp8,0,1.3085439682006836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,2,128,1,float16,float16,0,1.3765664100646973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,4,128,1,float16,float16,0,1.3145376205444337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,4,128,1,float16,fp8,0,1.271731185913086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,4,128,1,fp8,fp8,0,1.152233600616455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,8,128,1,float16,fp8,0,1.2471327781677246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,8,128,1,float16,float16,0,1.3511152267456055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,8,128,1,fp8,fp8,0,1.2459471702575684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,48,8,128,1,float16,fp8,0,2.2782543182373045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,48,2,128,1,fp8,fp8,0,1.1704015731811523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,48,1,128,1,float16,fp8,0,10.875049591064453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,48,1,128,1,fp8,fp8,0,10.745961761474609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,48,2,128,1,float16,fp8,0,10.701058959960937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,48,2,128,1,fp8,fp8,0,10.585892486572266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,48,1,128,1,float16,float16,0,13.634556579589844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,48,2,128,1,float16,float16,0,13.896029663085937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,48,4,128,1,float16,float16,0,13.737063598632812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,48,4,128,1,float16,fp8,0,10.640064239501953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,48,128,1,float16,fp8,0,5.82087516784668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,48,128,1,float16,float16,0,7.543972778320312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,48,4,128,1,fp8,fp8,0,11.202222442626953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,48,128,1,fp8,fp8,0,5.739510345458984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,48,8,128,1,float16,fp8,0,10.858977508544921
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,48,8,128,1,fp8,fp8,0,10.745646667480468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,1,128,1,float16,float16,0,6.672962951660156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,48,8,128,1,float16,float16,0,14.205278015136718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,1,128,1,float16,fp8,0,5.41496467590332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,1,128,1,fp8,fp8,0,5.534075164794922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,2,128,1,float16,fp8,0,5.367457580566406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,2,128,1,fp8,fp8,0,5.434822463989258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,2,128,1,float16,float16,0,7.171497344970703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,4,128,1,float16,float16,0,6.638081359863281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,4,128,1,float16,fp8,0,5.398600006103516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,4,128,1,fp8,fp8,0,5.403665542602539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,48,128,1,float16,float16,0,3.733871841430664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,8,128,1,float16,fp8,0,5.568608093261719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,48,128,1,fp8,fp8,0,2.8951183319091798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,8,128,1,float16,float16,0,7.200918579101563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,48,8,128,1,fp8,fp8,0,5.4225921630859375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,1,128,1,float16,float16,0,3.113947105407715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,1,128,1,float16,fp8,0,2.642078399658203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,1,128,1,fp8,fp8,0,2.787892723083496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,2,128,1,float16,float16,0,3.3736736297607424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,48,128,1,float16,fp8,0,3.113137626647949
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,2,128,1,fp8,fp8,0,2.8482255935668945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,4,128,1,float16,fp8,0,2.7680383682250977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,4,128,1,fp8,fp8,0,2.682360076904297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,4,128,1,float16,float16,0,3.4426929473876955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,48,128,1,float16,fp8,0,1.402017593383789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,48,128,1,float16,float16,0,1.7520624160766602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,48,128,1,fp8,fp8,0,1.4308239936828613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,8,128,1,float16,float16,0,3.350128173828125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,8,128,1,float16,fp8,0,2.8408735275268553
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,8,128,1,fp8,fp8,0,2.6578319549560545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,1,128,1,float16,float16,0,1.833768081665039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,1,128,1,float16,fp8,0,1.3154111862182618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,48,2,128,1,float16,fp8,0,2.744273567199707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,1,128,1,fp8,fp8,0,1.4879471778869628
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,2,128,1,float16,float16,0,1.5499216079711915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,2,128,1,fp8,fp8,0,1.3363696098327638
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,4,128,1,float16,fp8,0,1.4974896430969238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,4,128,1,fp8,fp8,0,1.3490639686584474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,8,128,1,float16,float16,0,1.5103343963623046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,48,128,1,float16,float16,0,0.8694512367248535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,8,128,1,float16,fp8,0,1.3733471870422362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,48,128,1,float16,fp8,0,0.7356160163879395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,8,128,1,fp8,fp8,0,1.4542192459106444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,48,128,1,fp8,fp8,0,0.7367087841033936
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,1,128,1,float16,fp8,0,0.6764463901519775
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,1,128,1,fp8,fp8,0,0.6744703769683837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,2,128,1,float16,float16,0,0.7701839923858642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,2,128,1,float16,fp8,0,0.6931503772735595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,2,128,1,fp8,fp8,0,0.7090288162231445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,4,128,1,float16,float16,0,0.7695680141448975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,4,128,1,float16,fp8,0,0.667303991317749
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,4,128,1,fp8,fp8,0,0.7208447933197022
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,8,128,1,float16,float16,0,0.770691204071045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,8,128,1,float16,fp8,0,0.7015151977539062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,2,128,1,float16,fp8,0,1.4921248435974122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,8,128,1,fp8,fp8,0,0.7423871994018555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,48,4,128,1,float16,float16,0,1.5585007667541504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,48,1,128,1,float16,float16,0,0.8097135543823242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,48,1,128,1,float16,fp8,0,9.974214172363281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,48,1,128,1,fp8,fp8,0,9.927641296386719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,48,2,128,1,float16,fp8,0,9.927458953857421
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,48,2,128,1,fp8,fp8,0,10.094468688964843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,48,1,128,1,float16,float16,0,12.780599975585938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,48,4,128,1,float16,fp8,0,9.939218902587891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,48,2,128,1,float16,float16,0,12.67252197265625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,48,4,128,1,float16,float16,0,12.792002868652343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,48,128,1,float16,fp8,0,5.393929672241211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,48,128,1,fp8,fp8,0,5.469507217407227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,48,128,1,float16,float16,0,7.148953247070312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,1,128,1,float16,float16,0,6.247260665893554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,48,4,128,1,fp8,fp8,0,10.159979248046875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,48,8,128,1,float16,fp8,0,10.060345458984376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,48,8,128,1,fp8,fp8,0,10.46162109375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,48,8,128,1,float16,float16,0,13.178321838378906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,1,128,1,float16,fp8,0,4.904032135009766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,1,128,1,fp8,fp8,0,4.896551895141601
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,2,128,1,float16,fp8,0,5.051945495605469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,2,128,1,fp8,fp8,0,4.943600082397461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,2,128,1,float16,float16,0,6.514524841308594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,4,128,1,float16,fp8,0,5.081353759765625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,4,128,1,float16,float16,0,6.1816974639892575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,4,128,1,fp8,fp8,0,5.059569549560547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,48,128,1,float16,fp8,0,2.7874080657958986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,48,128,1,fp8,fp8,0,2.748478317260742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,48,128,1,float16,float16,0,3.5718257904052733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,8,128,1,float16,float16,0,6.386886215209961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,8,128,1,float16,fp8,0,5.124351882934571
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,48,8,128,1,fp8,fp8,0,5.1864784240722654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,1,128,1,float16,float16,0,3.0053903579711916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,1,128,1,float16,fp8,0,2.5254495620727537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,1,128,1,fp8,fp8,0,2.4698207855224608
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,2,128,1,float16,fp8,0,2.5705024719238283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,2,128,1,fp8,fp8,0,2.5810192108154295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,2,128,1,float16,float16,0,3.2381038665771484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,4,128,1,float16,float16,0,3.211337661743164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,4,128,1,float16,fp8,0,2.6192672729492186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,48,128,1,float16,float16,0,1.894251251220703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,48,128,1,float16,fp8,0,1.5121968269348145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,4,128,1,fp8,fp8,0,2.4805471420288088
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,8,128,1,float16,fp8,0,2.496112060546875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,8,128,1,fp8,fp8,0,2.482480049133301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,48,128,1,fp8,fp8,0,1.6503551483154297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,1,128,1,float16,float16,0,1.4335663795471192
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,48,8,128,1,float16,float16,0,2.9931711196899413
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,1,128,1,fp8,fp8,0,1.272043228149414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,1,128,1,float16,fp8,0,1.47914400100708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,2,128,1,float16,float16,0,1.4379055976867676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,2,128,1,fp8,fp8,0,1.247606372833252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,4,128,1,float16,float16,0,1.4422783851623535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,4,128,1,float16,fp8,0,1.3157440185546876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,4,128,1,fp8,fp8,0,1.4219792366027832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,48,128,1,float16,fp8,0,0.7029088020324707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,48,128,1,float16,float16,0,0.8005215644836425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,8,128,1,float16,float16,0,1.458687973022461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,48,128,1,fp8,fp8,0,0.6930831909179688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,8,128,1,float16,fp8,0,1.2766863822937011
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,8,128,1,fp8,fp8,0,1.3275263786315918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,1,128,1,float16,float16,0,0.7284351825714112
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,1,128,1,float16,fp8,0,0.6555295944213867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,2,128,1,float16,fp8,0,0.6240416049957276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,2,128,1,fp8,fp8,0,0.6228975772857666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,4,128,1,float16,float16,0,0.7338784217834473
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,4,128,1,float16,fp8,0,0.656931209564209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,4,128,1,fp8,fp8,0,0.6363247871398926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,8,128,1,float16,fp8,0,0.6260479927062989
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,8,128,1,float16,float16,0,0.7158751964569092
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,48,128,1,float16,float16,0,0.4131616115570068
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,8,128,1,fp8,fp8,0,0.6423967838287353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,48,128,1,fp8,fp8,0,0.34235520362854005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,1,128,1,float16,float16,0,0.3599663972854614
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,1,128,1,float16,fp8,0,0.3191200017929077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,1,128,1,fp8,fp8,0,0.31788480281829834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,48,2,128,1,float16,fp8,0,1.2669599533081055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,2,128,1,float16,float16,0,0.3625920057296753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,2,128,1,float16,fp8,0,0.3262063980102539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,4,128,1,float16,float16,0,0.36582560539245607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,4,128,1,float16,fp8,0,0.3159647941589355
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,4,128,1,fp8,fp8,0,0.3172528028488159
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,8,128,1,float16,float16,0,0.3673775911331177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,8,128,1,float16,fp8,0,0.31712319850921633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,8,128,1,fp8,fp8,0,0.31784639358520506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,1,128,1,fp8,fp8,0,0.6168975830078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,48,2,128,1,float16,float16,0,0.795201587677002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,48,128,1,float16,fp8,0,0.34498560428619385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,48,2,128,1,fp8,fp8,0,0.31980319023132325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,48,1,128,1,float16,fp8,0,5.818643188476562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,48,1,128,1,fp8,fp8,0,5.879801559448242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,48,1,128,1,float16,float16,0,7.331900787353516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,48,2,128,1,float16,fp8,0,5.821255874633789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,48,2,128,1,float16,float16,0,7.217779541015625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,48,2,128,1,fp8,fp8,0,5.889219284057617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,48,4,128,1,float16,fp8,0,5.846102523803711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,48,4,128,1,float16,float16,0,7.411466979980469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,48,128,1,float16,fp8,0,3.282640075683594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,48,128,1,float16,float16,0,4.115959930419922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,48,4,128,1,fp8,fp8,0,5.885464096069336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,48,128,1,fp8,fp8,0,3.238470458984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,48,8,128,1,fp8,fp8,0,5.929961776733398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,48,8,128,1,float16,float16,0,7.644737243652344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,1,128,1,float16,fp8,0,2.916315269470215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,1,128,1,fp8,fp8,0,3.010393524169922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,2,128,1,float16,float16,0,3.6126705169677735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,2,128,1,float16,fp8,0,2.906820869445801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,2,128,1,fp8,fp8,0,2.903166389465332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,1,128,1,float16,float16,0,3.6447486877441406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,48,8,128,1,float16,fp8,0,6.043016052246093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,4,128,1,float16,fp8,0,2.957872009277344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,4,128,1,float16,float16,0,3.754070281982422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,4,128,1,fp8,fp8,0,2.9152032852172853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,8,128,1,float16,fp8,0,2.9362064361572267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,48,128,1,float16,fp8,0,1.6213279724121095
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,48,128,1,fp8,fp8,0,1.6483631134033203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,48,128,1,float16,float16,0,1.964334487915039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,1,128,1,float16,float16,0,1.8799407958984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,8,128,1,float16,float16,0,3.777124786376953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,48,8,128,1,fp8,fp8,0,2.9530559539794923
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,1,128,1,float16,fp8,0,1.4576895713806153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,1,128,1,fp8,fp8,0,1.4447903633117676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,2,128,1,float16,fp8,0,1.4439200401306151
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,2,128,1,float16,float16,0,1.70623836517334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,2,128,1,fp8,fp8,0,1.5096287727355957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,4,128,1,float16,fp8,0,1.4643407821655274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,4,128,1,fp8,fp8,0,1.4574015617370606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,4,128,1,float16,float16,0,1.8394304275512696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,48,128,1,float16,float16,0,0.9864463806152344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,48,128,1,float16,fp8,0,0.81845121383667
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,8,128,1,float16,float16,0,1.729520034790039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,48,128,1,fp8,fp8,0,0.816915225982666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,1,128,1,float16,float16,0,0.8981264114379883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,8,128,1,fp8,fp8,0,1.596083164215088
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,1,128,1,float16,fp8,0,0.7654032230377197
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,1,128,1,fp8,fp8,0,0.7290688037872315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,2,128,1,float16,fp8,0,0.7307663917541504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,2,128,1,float16,float16,0,0.8978752136230469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,2,128,1,fp8,fp8,0,0.757041597366333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,4,128,1,float16,float16,0,0.9158080101013184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,4,128,1,float16,fp8,0,0.7488560199737548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,4,128,1,fp8,fp8,0,0.737007999420166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,8,128,1,float16,float16,0,0.8456288337707519
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,8,128,1,float16,fp8,0,0.7793327808380127
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,48,8,128,1,fp8,fp8,0,0.7521503925323486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,48,128,1,float16,fp8,0,0.41693921089172364
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,48,128,1,fp8,fp8,0,0.4147280216217041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,1,128,1,float16,fp8,0,0.377841591835022
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,1,128,1,fp8,fp8,0,0.3656464099884033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,2,128,1,float16,float16,0,0.4243567943572998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,2,128,1,float16,fp8,0,0.3707583904266357
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,2,128,1,fp8,fp8,0,0.37655200958251955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,4,128,1,float16,float16,0,0.42606558799743655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,4,128,1,float16,fp8,0,0.36899518966674805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,4,128,1,fp8,fp8,0,0.37281599044799807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,8,128,1,float16,float16,0,0.42921600341796873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,8,128,1,float16,fp8,0,0.3744784116744995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,48,128,1,float16,float16,0,0.2594847917556763
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,48,8,128,1,float16,fp8,0,1.4828495979309082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,48,128,1,float16,fp8,0,0.21171040534973146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,48,128,1,fp8,fp8,0,0.21049599647521972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,1,128,1,float16,float16,0,0.2167327880859375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,1,128,1,float16,fp8,0,0.18823200464248657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,1,128,1,fp8,fp8,0,0.1889232039451599
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,2,128,1,float16,float16,0,0.21828479766845704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,2,128,1,float16,fp8,0,0.19045280218124389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,2,128,1,fp8,fp8,0,0.188919997215271
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,4,128,1,float16,fp8,0,0.18991999626159667
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,4,128,1,fp8,fp8,0,0.1912287950515747
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,8,128,1,float16,float16,0,0.22070879936218263
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,8,128,1,float16,fp8,0,0.1920464038848877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,8,128,1,fp8,fp8,0,0.19378399848937988
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,48,128,1,float16,float16,0,0.5338352203369141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,1,128,1,float16,float16,0,0.42481279373168945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,48,8,128,1,fp8,fp8,0,0.37404160499572753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,48,4,128,1,float16,float16,0,0.21847679615020751
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,48,1,128,1,float16,fp8,0,5.711558532714844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,48,1,128,1,fp8,fp8,0,5.625068664550781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,48,1,128,1,float16,float16,0,6.919840240478516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,48,2,128,1,float16,fp8,0,5.6590831756591795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,48,2,128,1,fp8,fp8,0,5.6425823211669925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,48,2,128,1,float16,float16,0,7.037899017333984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,48,4,128,1,float16,fp8,0,5.723376083374023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,48,4,128,1,float16,float16,0,7.056841278076172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,48,128,1,float16,fp8,0,3.3925407409667967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,48,4,128,1,fp8,fp8,0,5.812932968139648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,48,128,1,fp8,fp8,0,3.2855503082275392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,48,8,128,1,float16,float16,0,7.20899658203125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,48,8,128,1,float16,fp8,0,5.893313598632813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,48,8,128,1,fp8,fp8,0,5.810006332397461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,1,128,1,float16,float16,0,3.446905517578125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,1,128,1,float16,fp8,0,2.8396591186523437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,1,128,1,fp8,fp8,0,2.8597360610961915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,48,128,1,float16,float16,0,4.1243328094482425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,2,128,1,fp8,fp8,0,2.841823959350586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,2,128,1,float16,fp8,0,2.8985776901245117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,4,128,1,float16,fp8,0,2.946232032775879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,2,128,1,float16,float16,0,3.3600448608398437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,4,128,1,float16,float16,0,3.403607940673828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,4,128,1,fp8,fp8,0,2.8521984100341795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,48,128,1,float16,fp8,0,1.646895980834961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,48,128,1,fp8,fp8,0,1.6947343826293946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,48,128,1,float16,float16,0,2.0132272720336912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,8,128,1,float16,fp8,0,2.9095600128173826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,1,128,1,float16,float16,0,1.7086240768432617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,8,128,1,fp8,fp8,0,2.9155696868896483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,1,128,1,float16,fp8,0,1.4160719871520997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,1,128,1,fp8,fp8,0,1.408345603942871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,2,128,1,float16,float16,0,1.6114927291870118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,2,128,1,float16,fp8,0,1.5204655647277832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,2,128,1,fp8,fp8,0,1.4260848045349122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,4,128,1,float16,float16,0,1.7176559448242188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,4,128,1,float16,fp8,0,1.4515952110290526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,4,128,1,fp8,fp8,0,1.421440029144287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,48,128,1,float16,float16,0,0.9934896469116211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,8,128,1,float16,float16,0,1.6862768173217773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,48,128,1,float16,fp8,0,0.8904848098754883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,1,128,1,float16,float16,0,0.8048640251159668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,8,128,1,fp8,fp8,0,1.4583696365356444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,48,8,128,1,float16,fp8,0,1.4848640441894532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,48,8,128,1,float16,float16,0,3.547012710571289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,1,128,1,float16,fp8,0,0.7458975791931153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,1,128,1,fp8,fp8,0,0.7847792148590088
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,2,128,1,float16,float16,0,0.8274767875671387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,2,128,1,float16,fp8,0,0.7643792152404785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,2,128,1,fp8,fp8,0,0.7175631999969483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,4,128,1,float16,float16,0,0.8240752220153809
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,4,128,1,float16,fp8,0,0.7371024131774903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,4,128,1,fp8,fp8,0,0.7526720046997071
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,8,128,1,float16,float16,0,0.8414735794067383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,8,128,1,float16,fp8,0,0.752126407623291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,8,128,1,fp8,fp8,0,0.7291776180267334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,48,128,1,float16,float16,0,0.5062560081481934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,48,128,1,float16,fp8,0,0.4244431972503662
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,48,128,1,fp8,fp8,0,0.42954721450805666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,1,128,1,float16,float16,0,0.42211198806762695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,1,128,1,float16,fp8,0,0.36537599563598633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,1,128,1,fp8,fp8,0,0.36006081104278564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,2,128,1,float16,float16,0,0.41730561256408694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,2,128,1,fp8,fp8,0,0.36522560119628905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,4,128,1,float16,fp8,0,0.36723039150238035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,4,128,1,float16,float16,0,0.4303487777709961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,4,128,1,fp8,fp8,0,0.36475839614868166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,8,128,1,float16,float16,0,0.42942399978637696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,8,128,1,float16,fp8,0,0.3724368095397949
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,48,128,1,float16,float16,0,0.26067519187927246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,48,128,1,float16,fp8,0,0.21766560077667235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,8,128,1,fp8,fp8,0,0.3687824010848999
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,48,128,1,fp8,fp8,0,0.2220463991165161
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,1,128,1,float16,float16,0,0.20969278812408448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,1,128,1,float16,fp8,0,0.18609280586242677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,2,128,1,float16,fp8,0,0.1907088041305542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,2,128,1,fp8,fp8,0,0.18585439920425414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,4,128,1,float16,fp8,0,0.19089759588241578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,4,128,1,fp8,fp8,0,0.18747520446777344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,8,128,1,float16,float16,0,0.220795202255249
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,8,128,1,float16,fp8,0,0.19025919437408448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,8,128,1,fp8,fp8,0,0.1935696005821228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,48,128,1,float16,float16,0,0.136571204662323
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,48,128,1,float16,fp8,0,0.11553599834442138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,48,128,1,fp8,fp8,0,0.11527680158615113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,1,128,1,float16,float16,0,0.11043839454650879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,1,128,1,float16,fp8,0,0.09601439833641053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,1,128,1,fp8,fp8,0,0.09547039866447449
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,2,128,1,float16,float16,0,0.1107983946800232
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,2,128,1,float16,fp8,0,0.09536479711532593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,2,128,1,fp8,fp8,0,0.09632319808006287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,4,128,1,float16,float16,0,0.11198240518569946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,4,128,1,float16,fp8,0,0.09704959988594056
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,4,128,1,fp8,fp8,0,0.09630079865455628
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,8,128,1,float16,float16,0,0.11435680389404297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,8,128,1,fp8,fp8,0,0.09869279861450195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,48,2,128,1,float16,fp8,0,0.3639039993286133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,1,128,1,fp8,fp8,0,0.18501280546188353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,48,48,128,1,fp8,fp8,0,0.8311311721801757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,4,128,1,float16,float16,0,0.2134687900543213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,48,8,128,1,float16,fp8,0,0.09775360226631165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,48,1,128,1,float16,fp8,0,3.4864017486572267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,48,1,128,1,float16,float16,0,4.013711929321289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,48,2,128,1,float16,float16,0,0.21071360111236573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,48,1,128,1,fp8,fp8,0,3.5031711578369142
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,48,2,128,1,float16,fp8,0,3.494475173950195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,48,2,128,1,fp8,fp8,0,3.4908817291259764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,48,2,128,1,float16,float16,0,4.1287792205810545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,48,4,128,1,float16,fp8,0,3.5224334716796877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,48,4,128,1,float16,float16,0,4.246847915649414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,48,4,128,1,fp8,fp8,0,3.61077766418457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,48,128,1,float16,fp8,0,2.1166208267211912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,48,128,1,float16,float16,0,2.524785614013672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,48,8,128,1,float16,float16,0,4.270614242553711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,48,8,128,1,float16,fp8,0,3.5596641540527343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,48,8,128,1,fp8,fp8,0,3.591286468505859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,1,128,1,float16,fp8,0,1.7399631500244142
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,48,128,1,fp8,fp8,0,2.0751407623291014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,1,128,1,float16,float16,0,2.0763599395751955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,1,128,1,fp8,fp8,0,1.7383247375488282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,2,128,1,float16,float16,0,2.0253728866577148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,2,128,1,float16,fp8,0,1.8633615493774414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,2,128,1,fp8,fp8,0,1.7410543441772461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,4,128,1,float16,float16,0,1.992265510559082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,4,128,1,float16,fp8,0,1.7630624771118164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,4,128,1,fp8,fp8,0,1.75164794921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,8,128,1,float16,fp8,0,1.791387176513672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,8,128,1,float16,float16,0,2.171311950683594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,48,8,128,1,fp8,fp8,0,1.8714431762695312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,48,128,1,float16,float16,0,1.2771599769592286
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,48,128,1,fp8,fp8,0,1.0968223571777345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,1,128,1,fp8,fp8,0,0.8854432106018066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,2,128,1,float16,fp8,0,0.8885871887207031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,2,128,1,float16,float16,0,1.0527376174926757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,2,128,1,fp8,fp8,0,0.8987680435180664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,4,128,1,float16,fp8,0,0.8870143890380859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,4,128,1,float16,float16,0,1.0046159744262695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,4,128,1,fp8,fp8,0,0.8909680366516113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,8,128,1,float16,float16,0,1.0878815650939941
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,8,128,1,float16,fp8,0,0.9054479598999023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,48,128,1,float16,float16,0,0.6580111980438232
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,8,128,1,fp8,fp8,0,0.9024815559387207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,48,128,1,float16,fp8,0,0.5334479808807373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,1,128,1,float16,float16,0,0.494868803024292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,48,128,1,fp8,fp8,0,0.5623775959014893
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,1,128,1,float16,fp8,0,0.46959681510925294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,1,128,1,fp8,fp8,0,0.44313597679138184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,2,128,1,float16,float16,0,0.4977680206298828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,2,128,1,float16,fp8,0,0.472057580947876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,2,128,1,fp8,fp8,0,0.46136322021484377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,4,128,1,float16,float16,0,0.5046319961547852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,48,128,1,float16,fp8,0,1.0444272041320801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,1,128,1,float16,fp8,0,0.8714192390441895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,4,128,1,float16,fp8,0,0.45551362037658694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,4,128,1,fp8,fp8,0,0.46018080711364745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,8,128,1,float16,float16,0,0.5177519798278809
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,8,128,1,float16,fp8,0,0.45720958709716797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,48,8,128,1,fp8,fp8,0,0.4565728187561035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,48,128,1,float16,float16,0,0.32327680587768554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,48,128,1,float16,fp8,0,0.27487359046936033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,1,128,1,float16,float16,0,0.25430400371551515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,1,128,1,fp8,fp8,0,0.22671520709991455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,2,128,1,float16,float16,0,0.2541599988937378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,2,128,1,float16,fp8,0,0.23085439205169678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,2,128,1,fp8,fp8,0,0.23004798889160155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,4,128,1,float16,float16,0,0.25728158950805663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,4,128,1,float16,fp8,0,0.23134241104125977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,4,128,1,fp8,fp8,0,0.22994720935821533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,8,128,1,float16,float16,0,0.2648080110549927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,8,128,1,float16,fp8,0,0.23419361114501952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,48,128,1,float16,float16,0,0.17073440551757812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,8,128,1,fp8,fp8,0,0.23551199436187745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,48,128,1,float16,fp8,0,0.14360159635543823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,48,128,1,fp8,fp8,0,0.14323519468307494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,1,128,1,float16,fp8,0,0.11836960315704345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,2,128,1,float16,float16,0,0.1332479953765869
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,2,128,1,float16,fp8,0,0.11924480199813843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,2,128,1,fp8,fp8,0,0.11923199892044067
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,4,128,1,float16,float16,0,0.1345728039741516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,4,128,1,float16,fp8,0,0.12059040069580078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,4,128,1,fp8,fp8,0,0.12007360458374024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,8,128,1,float16,float16,0,0.1379040002822876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,8,128,1,float16,fp8,0,0.12215520143508911
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,8,128,1,fp8,fp8,0,0.12200800180435181
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,48,128,1,float16,float16,0,0.0900223970413208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,48,128,1,float16,fp8,0,0.07814880013465882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,48,128,1,fp8,fp8,0,0.07835999727249146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,1,128,1,float16,float16,0,0.06901119947433472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,1,128,1,float16,fp8,0,0.06305760145187378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,48,1,128,1,float16,float16,0,0.9812416076660156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,2,128,1,float16,float16,0,0.06962559819221496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,2,128,1,float16,fp8,0,0.06375359892845153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,2,128,1,fp8,fp8,0,0.06354079842567444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,4,128,1,float16,float16,0,0.07213600277900696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,4,128,1,float16,fp8,0,0.06351040005683899
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,4,128,1,fp8,fp8,0,0.06438239812850952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,48,128,1,fp8,fp8,0,0.2753391981124878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,8,128,1,float16,fp8,0,0.06389120221138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,8,128,1,fp8,fp8,0,0.06466240286827088
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,1,128,1,fp8,fp8,0,0.11965759992599487
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,1,128,1,fp8,fp8,0,0.06344159841537475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,48,8,128,1,float16,float16,0,0.07517439723014832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,48,1,128,1,float16,fp8,0,0.22722239494323732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,48,1,128,1,float16,fp8,0,3.640087890625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,48,1,128,1,fp8,fp8,0,3.68372802734375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,48,1,128,1,float16,float16,0,4.20080337524414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,48,1,128,1,float16,float16,0,0.13299360275268554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,48,2,128,1,float16,float16,0,4.184675216674805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,48,2,128,1,float16,fp8,0,3.669510269165039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,48,2,128,1,fp8,fp8,0,3.657523345947266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,48,4,128,1,float16,fp8,0,3.6894622802734376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,48,4,128,1,fp8,fp8,0,3.7013198852539064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,48,4,128,1,float16,float16,0,4.238124847412109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,48,8,128,1,float16,fp8,0,3.778071975708008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,48,8,128,1,float16,float16,0,4.526105499267578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,48,8,128,1,fp8,fp8,0,3.876460647583008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,48,128,1,float16,float16,0,2.7673295974731444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,1,128,1,float16,float16,0,2.0261775970458986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,48,128,1,float16,fp8,0,2.286014366149902
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,1,128,1,float16,fp8,0,1.9224256515502929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,48,128,1,fp8,fp8,0,2.286191940307617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,1,128,1,fp8,fp8,0,1.8178543090820312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,2,128,1,float16,float16,0,2.0877311706542967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,2,128,1,float16,fp8,0,1.849135971069336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,2,128,1,fp8,fp8,0,1.8794384002685547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,4,128,1,float16,fp8,0,1.8508495330810546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,4,128,1,fp8,fp8,0,1.8617071151733398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,4,128,1,float16,float16,0,2.1108240127563476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,8,128,1,float16,float16,0,2.1461103439331053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,8,128,1,float16,fp8,0,1.8785600662231445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,1,128,1,float16,float16,0,1.0046624183654784
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,48,128,1,float16,fp8,0,1.154800033569336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,48,8,128,1,fp8,fp8,0,1.9642208099365235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,48,128,1,float16,float16,0,1.371878433227539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,1,128,1,float16,fp8,0,0.9622143745422364
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,1,128,1,fp8,fp8,0,0.9602160453796387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,2,128,1,float16,float16,0,1.0047679901123048
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,2,128,1,float16,fp8,0,0.9257040023803711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,2,128,1,fp8,fp8,0,0.9269231796264649
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,4,128,1,float16,float16,0,1.0257216453552247
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,4,128,1,fp8,fp8,0,0.9547136306762696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,4,128,1,float16,fp8,0,0.9373359680175781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,8,128,1,float16,float16,0,1.0717439651489258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,8,128,1,float16,fp8,0,0.9663104057312012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,48,128,1,float16,float16,0,0.6921023845672607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,48,128,1,fp8,fp8,0,0.5875072002410888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,8,128,1,fp8,fp8,0,0.9541423797607422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,1,128,1,float16,fp8,0,0.46649918556213377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,2,128,1,float16,fp8,0,0.4667840003967285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,2,128,1,fp8,fp8,0,0.46831040382385253
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,4,128,1,float16,float16,0,0.5175424098968506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,48,48,128,1,fp8,fp8,0,1.142630386352539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,4,128,1,float16,fp8,0,0.4733583927154541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,4,128,1,fp8,fp8,0,0.4715263843536377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,8,128,1,float16,fp8,0,0.48386240005493164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,8,128,1,float16,float16,0,0.5315296173095703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,8,128,1,fp8,fp8,0,0.4827424049377441
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,48,128,1,fp8,fp8,0,0.29765279293060304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,48,128,1,float16,fp8,0,0.2965264081954956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,48,128,1,float16,float16,0,0.3565407991409302
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,1,128,1,float16,float16,0,0.25974559783935547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,48,128,1,float16,fp8,0,0.6102128028869629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,1,128,1,float16,float16,0,0.5089871883392334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,1,128,1,fp8,fp8,0,0.23903679847717285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,2,128,1,float16,float16,0,0.2614912033081055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,2,128,1,fp8,fp8,0,0.2374799966812134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,4,128,1,float16,float16,0,0.26629760265350344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,4,128,1,float16,fp8,0,0.2410288095474243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,1,128,1,fp8,fp8,0,0.4670815944671631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,4,128,1,fp8,fp8,0,0.2399967908859253
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,8,128,1,float16,float16,0,0.27388639450073243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,8,128,1,float16,fp8,0,0.24689600467681885
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,48,128,1,float16,float16,0,0.18385920524597169
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,48,128,1,float16,fp8,0,0.1556671977043152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,48,128,1,fp8,fp8,0,0.15513279438018798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,1,128,1,float16,float16,0,0.13496320247650145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,1,128,1,float16,fp8,0,0.12444800138473511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,1,128,1,fp8,fp8,0,0.12369120121002197
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,2,128,1,float16,float16,0,0.13597919940948486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,2,128,1,float16,fp8,0,0.12563040256500244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,2,128,1,fp8,fp8,0,0.12461919784545898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,4,128,1,float16,float16,0,0.13766399621963502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,4,128,1,float16,fp8,0,0.12622560262680055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,4,128,1,fp8,fp8,0,0.1254688024520874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,8,128,1,float16,float16,0,0.14211679697036744
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,8,128,1,float16,fp8,0,0.1290112018585205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,48,8,128,1,fp8,fp8,0,0.12853920459747314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,48,128,1,float16,fp8,0,0.08353760242462158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,48,128,1,fp8,fp8,0,0.0836239993572235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,1,128,1,float16,float16,0,0.07348960041999816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,1,128,1,float16,fp8,0,0.0642192006111145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,1,128,1,fp8,fp8,0,0.06470879912376404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,2,128,1,float16,float16,0,0.07393280267715455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,2,128,1,float16,fp8,0,0.06522560119628906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,2,128,1,fp8,fp8,0,0.06480479836463929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,4,128,1,float16,float16,0,0.07507519721984864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,4,128,1,float16,fp8,0,0.06564480066299438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,4,128,1,fp8,fp8,0,0.06592320203781128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,8,128,1,float16,float16,0,0.0769599974155426
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,8,128,1,float16,fp8,0,0.06834400296211243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,1,128,1,float16,fp8,0,0.2364272117614746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,48,128,1,float16,float16,0,0.057576000690460205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,48,128,1,float16,fp8,0,0.04806399941444397
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,48,128,1,fp8,fp8,0,0.04713279902935028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,1,128,1,float16,float16,0,0.03939520120620728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,1,128,1,float16,fp8,0,0.035622400045394895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,1,128,1,fp8,fp8,0,0.03603520095348358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,2,128,1,float16,float16,0,0.03914079964160919
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,2,128,1,float16,fp8,0,0.23863840103149414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,2,128,1,float16,fp8,0,0.03614560067653656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,2,128,1,fp8,fp8,0,0.03568960130214691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,48,2,128,1,float16,float16,0,0.5128464221954345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,4,128,1,float16,float16,0,0.039796799421310425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,4,128,1,fp8,fp8,0,0.0358240008354187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,8,128,1,float16,fp8,0,0.03674719929695129
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,8,128,1,fp8,fp8,0,0.036236798763275145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,48,8,128,1,fp8,fp8,0,0.24801919460296631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,48,128,1,float16,float16,0,0.09832159876823425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,48,8,128,1,fp8,fp8,0,0.06789119839668274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,48,1,128,1,float16,fp8,0,2.6615472793579102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,48,1,128,1,float16,float16,0,2.7985376358032226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,48,1,128,1,fp8,fp8,0,2.6785472869873046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,4,128,1,float16,fp8,0,0.035953599214553836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,48,8,128,1,float16,float16,0,0.040540799498558044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,48,2,128,1,float16,float16,0,2.815510368347168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,48,2,128,1,float16,fp8,0,2.670848083496094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,48,2,128,1,fp8,fp8,0,2.665979194641113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,48,4,128,1,float16,float16,0,2.8933904647827147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,48,4,128,1,float16,fp8,0,2.7208703994750976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,48,4,128,1,fp8,fp8,0,2.7160816192626953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,48,128,1,float16,float16,0,2.1424543380737306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,48,8,128,1,float16,float16,0,3.0202512741088867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,48,8,128,1,float16,fp8,0,2.8123023986816404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,48,8,128,1,fp8,fp8,0,2.8207376480102537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,48,128,1,float16,fp8,0,1.8435583114624023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,1,128,1,float16,float16,0,1.3868592262268067
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,1,128,1,float16,fp8,0,1.3474672317504883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,48,128,1,fp8,fp8,0,1.8672592163085937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,1,128,1,fp8,fp8,0,1.3627872467041016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,2,128,1,float16,fp8,0,1.3508079528808594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,2,128,1,fp8,fp8,0,1.3517871856689454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,2,128,1,float16,float16,0,1.451159954071045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,4,128,1,float16,float16,0,1.4302991867065429
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,4,128,1,float16,fp8,0,1.3799615859985352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,4,128,1,fp8,fp8,0,1.3650143623352051
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,48,128,1,float16,fp8,0,0.9391471862792968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,8,128,1,float16,float16,0,1.5006175994873048
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,48,128,1,float16,float16,0,1.0844655990600587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,8,128,1,float16,fp8,0,1.4172047615051269
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,48,8,128,1,fp8,fp8,0,1.4132495880126954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,1,128,1,float16,float16,0,0.7045775890350342
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,48,128,1,fp8,fp8,0,0.9303824424743652
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,1,128,1,float16,fp8,0,0.679641580581665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,2,128,1,float16,float16,0,0.7103231906890869
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,2,128,1,float16,fp8,0,0.6843376159667969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,2,128,1,fp8,fp8,0,0.6832816123962402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,4,128,1,float16,float16,0,0.7234655857086182
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,4,128,1,float16,fp8,0,0.6868175983428955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,4,128,1,fp8,fp8,0,0.6890207767486572
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,8,128,1,float16,float16,0,0.7573215961456299
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,48,128,1,float16,fp8,0,0.47179198265075684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,48,128,1,float16,float16,0,0.5486159801483155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,8,128,1,float16,fp8,0,0.7156000137329102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,48,128,1,fp8,fp8,0,0.47339839935302735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,1,128,1,float16,float16,0,0.3590559959411621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,1,128,1,fp8,fp8,0,0.3449408054351807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,2,128,1,float16,float16,0,0.36337919235229493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,2,128,1,float16,fp8,0,0.3470655918121338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,2,128,1,fp8,fp8,0,0.3473088026046753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,4,128,1,float16,float16,0,0.3687632083892822
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,4,128,1,float16,fp8,0,0.35221760272979735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,4,128,1,fp8,fp8,0,0.35240960121154785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,8,128,1,float16,float16,0,0.38528640270233155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,8,128,1,float16,fp8,0,0.3618096113204956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,8,128,1,fp8,fp8,0,0.36418719291687013
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,48,128,1,float16,float16,0,0.28247039318084716
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,48,128,1,float16,fp8,0,0.24324638843536378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,1,128,1,float16,float16,0,0.18684639930725097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,1,128,1,float16,fp8,0,0.17923519611358643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,1,128,1,fp8,fp8,0,0.17896159887313842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,2,128,1,float16,float16,0,0.18855520486831664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,2,128,1,float16,fp8,0,0.18023840188980103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,2,128,1,fp8,fp8,0,0.17982399463653564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,1,128,1,fp8,fp8,0,0.6916783809661865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,4,128,1,float16,float16,0,0.19213759899139404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,4,128,1,fp8,fp8,0,0.18191039562225342
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,8,128,1,float16,fp8,0,0.18875360488891602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,8,128,1,fp8,fp8,0,0.18721599578857423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,48,128,1,float16,float16,0,0.1488767981529236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,48,128,1,float16,fp8,0,0.12700159549713136
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,48,128,1,fp8,fp8,0,0.1281872034072876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,1,128,1,float16,float16,0,0.09931520223617554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,48,8,128,1,fp8,fp8,0,0.7149824142456055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,1,128,1,float16,fp8,0,0.09560800194740296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,1,128,1,fp8,fp8,0,0.09521600008010864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,48,1,128,1,float16,fp8,0,0.3446415901184082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,2,128,1,float16,fp8,0,0.09609599709510804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,2,128,1,float16,float16,0,0.10057120323181153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,2,128,1,fp8,fp8,0,0.09535840153694153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,4,128,1,float16,fp8,0,0.09719679951667785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,4,128,1,fp8,fp8,0,0.0967024028301239
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,8,128,1,float16,float16,0,0.10703840255737304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,8,128,1,float16,fp8,0,0.10044480562210083
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,8,128,1,fp8,fp8,0,0.09961919784545899
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,48,128,1,float16,float16,0,0.08104159832000732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,48,128,1,float16,fp8,0,0.07063199877738953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,48,128,1,fp8,fp8,0,0.06956959962844848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,1,128,1,float16,fp8,0,0.05125120282173157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,1,128,1,fp8,fp8,0,0.05100319981575012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,2,128,1,float16,float16,0,0.0566976010799408
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,2,128,1,float16,fp8,0,0.05115519762039185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,2,128,1,fp8,fp8,0,0.05137919783592224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,4,128,1,float16,float16,0,0.05720800161361694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,4,128,1,float16,fp8,0,0.05230879783630371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,4,128,1,fp8,fp8,0,0.05214080214500427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,8,128,1,float16,float16,0,0.05959839820861816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,8,128,1,float16,fp8,0,0.05425440073013306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,48,128,1,fp8,fp8,0,0.24142560958862305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,8,128,1,fp8,fp8,0,0.05501919984817505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,48,128,1,float16,fp8,0,0.039124798774719236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,48,128,1,fp8,fp8,0,0.03917120099067688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,1,128,1,float16,fp8,0,0.03049759864807129
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,2,128,1,float16,float16,0,0.03105599880218506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,4,128,1,float16,fp8,0,0.18156640529632567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,2,128,1,float16,fp8,0,0.030527999997138976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,2,128,1,fp8,fp8,0,0.03057439923286438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,4,128,1,float16,float16,0,0.03153760135173798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,4,128,1,float16,fp8,0,0.03059999942779541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,4,128,1,fp8,fp8,0,0.030711999535560607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,8,128,1,float16,float16,0,0.03213599920272827
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,8,128,1,float16,fp8,0,0.03125280141830444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,48,128,1,float16,float16,0,0.02677600085735321
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,48,128,1,float16,fp8,0,0.024260799586772918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,48,128,1,fp8,fp8,0,0.024222399294376373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,1,128,1,float16,float16,0,0.02033279985189438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,1,128,1,float16,fp8,0,0.0195360004901886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,1,128,1,fp8,fp8,0,0.019457599520683287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,2,128,1,float16,float16,0,0.020448000729084016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,2,128,1,float16,fp8,0,0.019308799505233766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,2,128,1,fp8,fp8,0,0.01945119947195053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,4,128,1,float16,float16,0,0.020608000457286835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,4,128,1,float16,fp8,0,0.019431999325752257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,4,128,1,fp8,fp8,0,0.019388799369335175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,48,1,128,1,float16,float16,0,0.05627520084381103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,8,128,1,float16,float16,0,0.02099999934434891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,8,128,1,float16,fp8,0,0.021323199570178985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,48,8,128,1,fp8,fp8,0,0.02122880071401596
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,48,128,1,float16,float16,0,0.04648320078849792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,1,128,1,fp8,fp8,0,0.030635198950767516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,48,1,128,1,float16,float16,0,1.1010592460632325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,48,8,128,1,float16,float16,0,0.19917600154876708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,8,128,1,fp8,fp8,0,0.03107840120792389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,48,1,128,1,float16,fp8,0,1.1174240112304688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,48,1,128,1,fp8,fp8,0,1.1185680389404298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,48,2,128,1,float16,float16,0,1.1136048316955567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,48,4,128,1,float16,float16,0,0.10237280130386353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,48,2,128,1,float16,fp8,0,1.1204079627990722
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,48,2,128,1,fp8,fp8,0,1.1166192054748536
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,48,4,128,1,float16,float16,0,1.1360671997070313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,48,4,128,1,float16,fp8,0,1.1350607872009277
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,48,4,128,1,fp8,fp8,0,1.1409152030944825
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,48,8,128,1,float16,float16,0,1.199556827545166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,48,8,128,1,float16,fp8,0,1.1754783630371093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,48,8,128,1,fp8,fp8,0,1.1703840255737306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,48,1,128,1,float16,float16,0,0.03134559988975525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,48,128,1,float16,float16,0,0.9484127998352051
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,1,128,1,float16,float16,0,0.5539840221405029
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,1,128,1,float16,fp8,0,0.5659023761749268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,48,128,1,fp8,fp8,0,0.7892784118652344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,1,128,1,fp8,fp8,0,0.5623280048370362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,2,128,1,float16,fp8,0,0.5673759937286377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,2,128,1,fp8,fp8,0,0.5667376041412353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,4,128,1,float16,float16,0,0.5758768081665039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,4,128,1,fp8,fp8,0,0.5720880031585693
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,4,128,1,float16,fp8,0,0.5776031970977783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,8,128,1,float16,fp8,0,0.5950575828552246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,48,128,1,float16,fp8,0,0.40200161933898926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,48,128,1,fp8,fp8,0,0.399620795249939
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,48,128,1,float16,float16,0,0.48195681571960447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,1,128,1,float16,float16,0,0.28601119518280027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,1,128,1,float16,fp8,0,0.28874399662017824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,1,128,1,fp8,fp8,0,0.2881983995437622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,2,128,1,float16,float16,0,0.2871311902999878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,2,128,1,float16,fp8,0,0.2918560028076172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,2,128,1,fp8,fp8,0,0.2885871887207031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,4,128,1,float16,float16,0,0.29416959285736083
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,4,128,1,float16,fp8,0,0.29541280269622805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,4,128,1,fp8,fp8,0,0.29284958839416503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,48,128,1,float16,fp8,0,0.7865759849548339
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,8,128,1,float16,float16,0,0.30980160236358645
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,2,128,1,float16,float16,0,0.5566512107849121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,8,128,1,float16,fp8,0,0.30617759227752683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,48,8,128,1,fp8,fp8,0,0.30326879024505615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,48,128,1,fp8,fp8,0,0.20691521167755128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,1,128,1,float16,float16,0,0.14699519872665406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,1,128,1,fp8,fp8,0,0.15042400360107422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,2,128,1,float16,float16,0,0.14983680248260497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,2,128,1,float16,fp8,0,0.1504863977432251
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,2,128,1,fp8,fp8,0,0.15089600086212157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,4,128,1,float16,fp8,0,0.15168160200119019
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,8,128,1,float16,float16,0,0.16073280572891235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,1,128,1,float16,fp8,0,0.15118399858474732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,4,128,1,float16,float16,0,0.1542847990989685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,4,128,1,fp8,fp8,0,0.1527343988418579
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,48,128,1,float16,float16,0,0.13123999834060668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,8,128,1,fp8,fp8,0,0.15963040590286254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,48,128,1,float16,fp8,0,0.10950720310211182
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,48,128,1,fp8,fp8,0,0.10985759496688843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,1,128,1,float16,float16,0,0.08084319829940796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,8,128,1,fp8,fp8,0,0.595959997177124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,2,128,1,float16,float16,0,0.08182560205459595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,2,128,1,float16,fp8,0,0.08118240237236023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,2,128,1,fp8,fp8,0,0.08192960023880005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,4,128,1,float16,float16,0,0.08286719918251037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,4,128,1,float16,fp8,0,0.08296480178833007
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,4,128,1,fp8,fp8,0,0.08239359855651855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,8,128,1,float16,float16,0,0.08808640241622925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,8,128,1,float16,fp8,0,0.08526239991188049
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,8,128,1,fp8,fp8,0,0.08560960292816162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,48,128,1,float16,float16,0,0.07338560223579407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,48,128,1,float16,fp8,0,0.061680001020431516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,48,128,1,fp8,fp8,0,0.06189280152320862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,1,128,1,float16,float16,0,0.04626719951629639
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,1,128,1,float16,fp8,0,0.043278399109840396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,1,128,1,fp8,fp8,0,0.04318560063838959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,2,128,1,float16,float16,0,0.04654400050640106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,2,128,1,float16,fp8,0,0.04374879896640778
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,2,128,1,fp8,fp8,0,0.043772798776626584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,4,128,1,float16,fp8,0,0.044758400321006774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,4,128,1,float16,float16,0,0.047958400845527646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,48,128,1,float16,fp8,0,0.20638399124145507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,8,128,1,float16,float16,0,0.04946720004081726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,8,128,1,float16,fp8,0,0.047203201055526736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,8,128,1,fp8,fp8,0,0.047152000665664676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,48,128,1,float16,fp8,0,0.03499679863452911
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,48,128,1,fp8,fp8,0,0.03490239977836609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,48,8,128,1,float16,float16,0,0.6071184158325196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,1,128,1,float16,float16,0,0.024961599707603456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,1,128,1,fp8,fp8,0,0.025923201441764833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,1,128,1,float16,fp8,0,0.025931200385093688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,1,128,1,float16,fp8,0,0.0809391975402832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,2,128,1,fp8,fp8,0,0.025913599133491515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,4,128,1,float16,float16,0,0.025257599353790284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,48,128,1,float16,float16,0,0.24930400848388673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,4,128,1,float16,fp8,0,0.026163199543952943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,48,8,128,1,float16,fp8,0,0.15771839618682862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,4,128,1,fp8,fp8,0,0.02621760070323944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,8,128,1,float16,float16,0,0.025772801041603087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,48,128,1,float16,float16,0,0.0416128009557724
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,8,128,1,float16,fp8,0,0.026577600836753847
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,48,128,1,float16,float16,0,0.0232464000582695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,48,128,1,float16,fp8,0,0.021505600214004515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,48,128,1,fp8,fp8,0,0.02115679979324341
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,1,128,1,float16,float16,0,0.01860480010509491
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,1,128,1,float16,fp8,0,0.017499199509620665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,1,128,1,fp8,fp8,0,0.017825600504875184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,2,128,1,float16,float16,0,0.018478399515151976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,2,128,1,float16,fp8,0,0.017759999632835387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,2,128,1,fp8,fp8,0,0.01780160069465637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,4,128,1,float16,float16,0,0.018697600066661834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,2,128,1,float16,float16,0,0.025091201066970825
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,4,128,1,float16,fp8,0,0.017739200592041017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,4,128,1,fp8,fp8,0,0.0177824005484581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,8,128,1,float16,fp8,0,0.01918720006942749
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,8,128,1,float16,float16,0,0.019204799830913544
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,48,128,1,float16,float16,0,0.01587519943714142
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,48,128,1,fp8,fp8,0,0.015358400344848634
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,1,128,1,float16,float16,0,0.013865600526332855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,1,128,1,float16,fp8,0,0.014230400323867798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,1,128,1,fp8,fp8,0,0.014182400703430176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,2,128,1,float16,float16,0,0.013916799426078796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,2,128,1,float16,fp8,0,0.014180800318717957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,2,128,1,fp8,fp8,0,0.014023999869823455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,4,128,1,float16,float16,0,0.013865600526332855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,4,128,1,float16,fp8,0,0.014004799723625182
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,4,128,1,fp8,fp8,0,0.01406719982624054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,8,128,1,float16,float16,0,0.014083200693130493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,8,128,1,float16,fp8,0,0.01443839967250824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,8,128,1,fp8,fp8,0,0.014446400105953217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,48,4,128,1,fp8,fp8,0,0.045044800639152525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,48,1,128,1,float16,float16,0,0.5193151950836181
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,8,128,1,fp8,fp8,0,0.026390400528907777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,48,1,128,1,fp8,fp8,0,0.5431295871734619
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,48,2,128,1,float16,float16,0,0.5181424140930175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,48,1,128,1,fp8,fp8,0,0.08100640177726745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,48,8,128,1,fp8,fp8,0,0.019684800505638124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,48,2,128,1,float16,fp8,0,0.5446847915649414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,48,48,128,1,float16,fp8,0,0.015559999644756317
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,48,2,128,1,fp8,fp8,0,0.5434656143188477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,48,4,128,1,float16,float16,0,0.536243200302124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,48,4,128,1,float16,fp8,0,0.5581567764282227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,48,4,128,1,fp8,fp8,0,0.5574495792388916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,48,8,128,1,float16,float16,0,0.568120002746582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,48,8,128,1,float16,fp8,0,0.5775728225708008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,48,8,128,1,fp8,fp8,0,0.5798416137695312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,48,128,1,float16,float16,0,0.4590047836303711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,1,128,1,float16,float16,0,0.262172794342041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,48,128,1,fp8,fp8,0,0.40145277976989746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,1,128,1,fp8,fp8,0,0.27899680137634275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,2,128,1,float16,float16,0,0.265993595123291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,2,128,1,float16,fp8,0,0.27928481101989744
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,2,128,1,fp8,fp8,0,0.2796544075012207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,4,128,1,float16,float16,0,0.27301599979400637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,4,128,1,fp8,fp8,0,0.2857599973678589
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,48,1,128,1,float16,fp8,0,0.5451536178588867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,8,128,1,float16,float16,0,0.2909631967544556
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,8,128,1,float16,fp8,0,0.29509921073913575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,48,128,1,float16,fp8,0,0.20767040252685548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,8,128,1,fp8,fp8,0,0.29768478870391846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,48,128,1,float16,float16,0,0.23978080749511718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,48,2,128,1,float16,fp8,0,0.025915199518203737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,1,128,1,float16,float16,0,0.13930560350418092
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,1,128,1,float16,fp8,0,0.14467999935150147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,1,128,1,fp8,fp8,0,0.14500319957733154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,2,128,1,float16,fp8,0,0.14697920083999633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,2,128,1,float16,float16,0,0.14082560539245606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,2,128,1,fp8,fp8,0,0.14559839963912963
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,4,128,1,float16,float16,0,0.1456112027168274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,4,128,1,float16,fp8,0,0.15036799907684326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,8,128,1,float16,float16,0,0.15243200063705445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,8,128,1,float16,fp8,0,0.1565343976020813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,8,128,1,fp8,fp8,0,0.1565551996231079
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,48,128,1,float16,float16,0,0.12694720029830933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,48,128,1,float16,fp8,0,0.10876799821853637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,48,128,1,fp8,fp8,0,0.10867680311203003
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,1,128,1,float16,float16,0,0.07687680125236511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,1,128,1,fp8,fp8,0,0.07807520031929016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,2,128,1,float16,float16,0,0.07813760042190551
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,2,128,1,float16,fp8,0,0.07866560220718384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,48,128,1,float16,fp8,0,0.40099358558654785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,4,128,1,float16,float16,0,0.0797599971294403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,4,128,1,float16,fp8,0,0.07950400114059449
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,4,128,1,fp8,fp8,0,0.07989439964294434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,8,128,1,float16,float16,0,0.08260480165481568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,8,128,1,float16,fp8,0,0.0824015974998474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,8,128,1,fp8,fp8,0,0.08103359937667846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,48,128,1,float16,float16,0,0.06982560157775879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,48,128,1,float16,fp8,0,0.05791040062904358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,48,128,1,fp8,fp8,0,0.05815520286560059
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,1,128,1,float16,float16,0,0.042735999822616576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,1,128,1,float16,fp8,0,0.03888800144195557
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,1,128,1,fp8,fp8,0,0.03909760117530823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,4,128,1,float16,fp8,0,0.2858112096786499
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,2,128,1,float16,fp8,0,0.039392000436782836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,2,128,1,fp8,fp8,0,0.039236798882484436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,4,128,1,float16,float16,0,0.044353601336479184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,4,128,1,float16,fp8,0,0.040214401483535764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,48,128,1,fp8,fp8,0,0.20678880214691162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,4,128,1,fp8,fp8,0,0.04025599956512451
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,8,128,1,float16,float16,0,0.046351999044418335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,8,128,1,float16,fp8,0,0.043017598986625674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,8,128,1,fp8,fp8,0,0.04321120083332062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,48,128,1,float16,float16,0,0.03951840102672577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,48,128,1,float16,fp8,0,0.02979680001735687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,48,128,1,fp8,fp8,0,0.029980799555778502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,1,128,1,float16,fp8,0,0.022844800353050233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,1,128,1,fp8,fp8,0,0.022867199778556824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,48,4,128,1,fp8,fp8,0,0.14827359914779664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,2,128,1,float16,float16,0,0.022387200593948366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,2,128,1,float16,fp8,0,0.022924800217151643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,2,128,1,fp8,fp8,0,0.0228752002120018
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,4,128,1,float16,float16,0,0.022699199616909027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,4,128,1,float16,fp8,0,0.0229312002658844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,4,128,1,fp8,fp8,0,0.02300799936056137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,8,128,1,float16,float16,0,0.02329760044813156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,1,128,1,float16,fp8,0,0.07817599773406983
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,8,128,1,fp8,fp8,0,0.023217600584030152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,48,128,1,float16,float16,0,0.022465600073337554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,48,128,1,float16,fp8,0,0.019049599766731262
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,48,128,1,fp8,fp8,0,0.01912959963083267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,1,128,1,float16,float16,0,0.01706880033016205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,48,2,128,1,fp8,fp8,0,0.0787343978881836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,1,128,1,fp8,fp8,0,0.01757120043039322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,2,128,1,float16,fp8,0,0.017528000473976135
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,2,128,1,float16,float16,0,0.017262400686740877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,48,1,128,1,float16,fp8,0,0.2781519889831543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,2,128,1,fp8,fp8,0,0.017508800327777862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,4,128,1,float16,float16,0,0.0173567995429039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,4,128,1,float16,fp8,0,0.017561599612236023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,4,128,1,fp8,fp8,0,0.017731200158596038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,8,128,1,float16,float16,0,0.01746560037136078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,8,128,1,float16,fp8,0,0.017931200563907623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,8,128,1,fp8,fp8,0,0.017836800217628478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,48,128,1,float16,float16,0,0.01446399986743927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,48,128,1,fp8,fp8,0,0.013643200695514678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,48,128,1,float16,fp8,0,0.013575999438762665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,1,128,1,float16,fp8,0,0.012744000554084778
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,1,128,1,fp8,fp8,0,0.01268640011548996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,2,128,1,float16,float16,0,0.012244799733161926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,2,128,1,float16,fp8,0,0.012812800705432892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,4,128,1,float16,float16,0,0.012460800260305405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,4,128,1,float16,fp8,0,0.012707200646400452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,4,128,1,fp8,fp8,0,0.01289760023355484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,8,128,1,float16,float16,0,0.012836800515651703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,8,128,1,float16,fp8,0,0.01311040073633194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,8,128,1,fp8,fp8,0,0.012774400413036346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,48,128,1,float16,float16,0,0.013763199746608733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,1,128,1,float16,float16,0,0.022023999691009523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,48,128,1,float16,fp8,0,0.013084800541400909
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,48,128,1,fp8,fp8,0,0.013017599284648896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,1,128,1,float16,float16,0,0.012107200175523757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,1,128,1,float16,fp8,0,0.012251199781894683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,1,128,1,fp8,fp8,0,0.01239359974861145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,2,128,1,float16,float16,0,0.011932799965143204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,2,128,1,float16,fp8,0,0.012601600587368011
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,2,128,1,fp8,fp8,0,0.012374400347471236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,4,128,1,float16,float16,0,0.012281599640846252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,4,128,1,fp8,fp8,0,0.012612800300121307
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,48,8,128,1,float16,fp8,0,0.02306720018386841
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,8,128,1,float16,float16,0,0.0124208003282547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,8,128,1,float16,fp8,0,0.012511999905109405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,8,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,48,1,128,1,float16,fp8,0,0.017662400007247926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,48,1,128,1,float16,float16,0,0.2660111904144287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,48,1,128,1,float16,fp8,0,0.27780001163482665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,48,1,128,1,fp8,fp8,0,0.27809278964996337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,48,2,128,1,float16,float16,0,0.2681360006332397
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,48,2,128,1,float16,fp8,0,0.27820799350738523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,48,2,128,1,fp8,fp8,0,0.2795952081680298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,1,128,1,float16,float16,0,0.012280000001192093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,48,4,128,1,fp8,fp8,0,0.2841648101806641
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,48,2,128,1,float16,float16,0,0.043510401248931886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,48,8,128,1,float16,float16,0,0.29155519008636477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,48,8,128,1,fp8,fp8,0,0.2945791959762573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,48,128,1,float16,float16,0,0.27723839282989504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,48,128,1,fp8,fp8,0,0.22403841018676757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,1,128,1,float16,float16,0,0.140067195892334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,1,128,1,float16,fp8,0,0.14390560388565063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,1,128,1,fp8,fp8,0,0.1444416046142578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,2,128,1,float16,float16,0,0.14217280149459838
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,2,128,1,float16,fp8,0,0.1469391942024231
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,2,128,1,fp8,fp8,0,0.14706720113754274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,4,128,1,float16,float16,0,0.14565279483795165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,4,128,1,float16,fp8,0,0.1494928002357483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,48,4,128,1,float16,fp8,0,0.012457600235939026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,4,128,1,fp8,fp8,0,0.15042719841003419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,8,128,1,float16,float16,0,0.15339200496673583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,8,128,1,float16,fp8,0,0.1541551947593689
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,8,128,1,fp8,fp8,0,0.1560960054397583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,48,128,1,float16,float16,0,0.1445456027984619
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,48,128,1,fp8,fp8,0,0.11884000301361083
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,1,128,1,float16,float16,0,0.0777072012424469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,1,128,1,float16,fp8,0,0.0781503975391388
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,1,128,1,fp8,fp8,0,0.07817919850349427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,2,128,1,float16,float16,0,0.0778223991394043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,2,128,1,float16,fp8,0,0.0782368004322052
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,48,4,128,1,float16,float16,0,0.2775568008422852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,48,4,128,1,float16,fp8,0,0.2841984033584595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,4,128,1,float16,float16,0,0.08055520057678223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,48,2,128,1,fp8,fp8,0,0.01271200031042099
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,4,128,1,float16,fp8,0,0.07980160117149353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,4,128,1,fp8,fp8,0,0.08023679852485657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,48,8,128,1,float16,fp8,0,0.2951488018035889
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,8,128,1,float16,float16,0,0.08377439975738525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,8,128,1,fp8,fp8,0,0.08256000280380249
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,48,128,1,float16,float16,0,0.07965440154075623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,48,128,1,float16,fp8,0,0.06179680228233338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,48,128,1,fp8,fp8,0,0.06252319812774658
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,48,48,128,1,float16,fp8,0,0.22618720531463624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,1,128,1,float16,float16,0,0.04327360093593598
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,1,128,1,fp8,fp8,0,0.03947519958019256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,2,128,1,float16,fp8,0,0.040320000052452086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,2,128,1,fp8,fp8,0,0.04013440012931824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,4,128,1,float16,float16,0,0.04467839896678925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,4,128,1,float16,fp8,0,0.04130719900131226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,4,128,1,fp8,fp8,0,0.04118559956550598
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,8,128,1,float16,float16,0,0.04748960137367249
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,8,128,1,float16,fp8,0,0.043731200695037845
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,8,128,1,fp8,fp8,0,0.04385760128498077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,48,128,1,float16,float16,0,0.04380480051040649
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,48,128,1,float16,fp8,0,0.0311055988073349
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,48,128,1,fp8,fp8,0,0.03089280128479004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,1,128,1,float16,float16,0,0.021804800629615782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,1,128,1,float16,fp8,0,0.022710399329662324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,1,128,1,fp8,fp8,0,0.022729599475860597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,2,128,1,float16,float16,0,0.02205439954996109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,2,128,1,float16,fp8,0,0.02274080067873001
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,2,128,1,fp8,fp8,0,0.022838400304317476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,4,128,1,float16,float16,0,0.022407999634742735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,4,128,1,float16,fp8,0,0.022889600694179536
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,4,128,1,fp8,fp8,0,0.022961600124835967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,8,128,1,float16,float16,0,0.0228752002120018
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,8,128,1,float16,fp8,0,0.023236800730228425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,48,8,128,1,fp8,fp8,0,0.02314079999923706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,48,128,1,float16,fp8,0,0.11828800439834594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,48,128,1,float16,float16,0,0.02234079986810684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,48,128,1,float16,fp8,0,0.01720000058412552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,48,128,1,fp8,fp8,0,0.01727840006351471
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,1,128,1,float16,float16,0,0.017049600183963776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,1,128,1,float16,fp8,0,0.01762239933013916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,1,128,1,fp8,fp8,0,0.017532800137996674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,2,128,1,float16,float16,0,0.017195199429988862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,2,128,1,float16,fp8,0,0.01759999990463257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,4,128,1,float16,float16,0,0.017220799624919892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,4,128,1,float16,fp8,0,0.017713600397109987
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,4,128,1,fp8,fp8,0,0.01762399971485138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,8,128,1,float16,float16,0,0.017441600561141968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,8,128,1,float16,fp8,0,0.01767680048942566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,8,128,1,fp8,fp8,0,0.017632000148296356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,48,128,1,float16,float16,0,0.014920000731945039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,48,128,1,float16,fp8,0,0.013816000521183014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,48,128,1,fp8,fp8,0,0.013755199313163758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,1,128,1,float16,float16,0,0.012163200229406358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,1,128,1,float16,fp8,0,0.012614400684833526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,1,128,1,fp8,fp8,0,0.012544000148773193
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,2,128,1,float16,float16,0,0.012297599762678146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,2,128,1,float16,fp8,0,0.012718400359153748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,2,128,1,fp8,fp8,0,0.012878400087356568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,4,128,1,float16,float16,0,0.012505599856376648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,4,128,1,float16,fp8,0,0.01252480000257492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,8,128,1,float16,fp8,0,0.08254879713058472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,8,128,1,float16,float16,0,0.012415999919176102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,8,128,1,float16,fp8,0,0.012742400169372559
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,8,128,1,fp8,fp8,0,0.012862400710582733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,1,128,1,float16,fp8,0,0.039443200826644896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,48,128,1,float16,float16,0,0.011648000031709672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,48,128,1,float16,fp8,0,0.010971199721097946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,1,128,1,float16,float16,0,0.012164799869060517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,1,128,1,float16,fp8,0,0.012430399656295776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,1,128,1,fp8,fp8,0,0.012462399899959564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,2,128,1,float16,float16,0,0.011952000111341477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,2,128,1,fp8,fp8,0,0.012467200309038163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,4,128,1,float16,float16,0,0.01202239990234375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,4,128,1,float16,fp8,0,0.01236959993839264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,4,128,1,fp8,fp8,0,0.012531200051307678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,8,128,1,float16,float16,0,0.012238399684429168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,8,128,1,float16,fp8,0,0.01265919953584671
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,8,128,1,fp8,fp8,0,0.01271039992570877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,48,128,1,float16,float16,0,0.011449600011110306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,48,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,48,128,1,fp8,fp8,0,0.010716799646615982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,1,128,1,float16,float16,0,0.011924800276756287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,1,128,1,float16,fp8,0,0.01249919980764389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,1,128,1,fp8,fp8,0,0.01225920021533966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,2,128,1,float16,float16,0,0.012068799883127212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,2,128,1,float16,fp8,0,0.012297599762678146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,2,128,1,fp8,fp8,0,0.012409599870443344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,4,128,1,float16,fp8,0,0.012558400630950928
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,4,128,1,fp8,fp8,0,0.012356799840927125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,8,128,1,float16,float16,0,0.011919999867677689
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,8,128,1,float16,fp8,0,0.012382400035858155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,8,128,1,fp8,fp8,0,0.012179200351238251
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,48,2,128,1,fp8,fp8,0,0.07824159860610962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,48,1,128,1,float16,float16,0,0.13869279623031616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,48,1,128,1,float16,fp8,0,0.14385279417037963
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,48,4,128,1,fp8,fp8,0,0.012600000202655792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,48,1,128,1,fp8,fp8,0,0.14371360540390016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,48,2,128,1,float16,float16,0,0.13994719982147216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,48,2,128,1,float16,fp8,0,0.1468943953514099
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,48,2,128,1,float16,float16,0,0.043803200125694275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,48,2,128,1,fp8,fp8,0,0.1471343994140625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,2,128,1,float16,fp8,0,0.012468799948692322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,48,4,128,1,float16,fp8,0,0.14911839962005616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,48,4,128,1,fp8,fp8,0,0.14963200092315673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,48,8,128,1,float16,float16,0,0.1805616021156311
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,48,8,128,1,float16,fp8,0,0.18438559770584106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,48,8,128,1,fp8,fp8,0,0.18462079763412476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,48,128,1,float16,float16,0,0.20208640098571778
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,48,128,1,fp8,fp8,0,0.17084800004959105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,1,128,1,float16,float16,0,0.07809919714927674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,1,128,1,float16,fp8,0,0.07905120253562928
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,1,128,1,fp8,fp8,0,0.07913600206375122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,2,128,1,float16,float16,0,0.07841119766235352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,2,128,1,float16,fp8,0,0.07978879809379577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,2,128,1,fp8,fp8,0,0.07960799932479859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,4,128,1,float16,float16,0,0.08099679946899414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,48,4,128,1,float16,float16,0,0.011961600184440613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,4,128,1,float16,fp8,0,0.08090239763259888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,48,2,128,1,fp8,fp8,0,0.017401599884033205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,4,128,1,fp8,fp8,0,0.08125920295715332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,8,128,1,float16,float16,0,0.0979856014251709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,8,128,1,float16,fp8,0,0.098854398727417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,8,128,1,fp8,fp8,0,0.09777759909629821
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,48,128,1,float16,float16,0,0.10834720134735107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,48,128,1,float16,fp8,0,0.09108960032463073
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,48,128,1,fp8,fp8,0,0.09119679927825927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,1,128,1,float16,fp8,0,0.039776000380516055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,1,128,1,fp8,fp8,0,0.039743998646736146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,2,128,1,float16,float16,0,0.04475519955158234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,2,128,1,float16,fp8,0,0.04010080099105835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,2,128,1,fp8,fp8,0,0.04023039937019348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,4,128,1,float16,float16,0,0.04604800045490265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,4,128,1,float16,fp8,0,0.04151360094547272
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,4,128,1,fp8,fp8,0,0.04106720089912415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,8,128,1,float16,float16,0,0.05518879890441895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,8,128,1,float16,fp8,0,0.05289120078086853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,8,128,1,fp8,fp8,0,0.05254240036010742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,48,128,1,float16,float16,0,0.058899199962615965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,48,128,1,float16,fp8,0,0.0463456004858017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,48,128,1,fp8,fp8,0,0.04609760046005249
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,1,128,1,float16,float16,0,0.022006399929523468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,1,128,1,float16,fp8,0,0.02309119999408722
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,1,128,1,fp8,fp8,0,0.023131200671195985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,2,128,1,float16,float16,0,0.022168000042438508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,2,128,1,float16,fp8,0,0.023108799755573273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,2,128,1,fp8,fp8,0,0.02324800044298172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,4,128,1,float16,float16,0,0.022470399737358093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,4,128,1,float16,fp8,0,0.023204800486564637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,4,128,1,fp8,fp8,0,0.02327840030193329
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,48,48,128,1,fp8,fp8,0,0.010785599797964096
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,8,128,1,float16,fp8,0,0.027665600180625916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,8,128,1,fp8,fp8,0,0.027599999308586122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,48,128,1,float16,fp8,0,0.024771200120449068
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,48,4,128,1,float16,float16,0,0.14512640237808228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,1,128,1,float16,float16,0,0.017008000612258913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,1,128,1,float16,fp8,0,0.01780479997396469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,1,128,1,fp8,fp8,0,0.017652800679206847
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,2,128,1,float16,float16,0,0.017190399765968322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,2,128,1,float16,fp8,0,0.01767359972000122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,2,128,1,fp8,fp8,0,0.01759839951992035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,4,128,1,float16,float16,0,0.017257599532604216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,4,128,1,float16,fp8,0,0.017718400061130523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,4,128,1,fp8,fp8,0,0.017574399709701538
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,8,128,1,float16,float16,0,0.01729599982500076
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,48,48,128,1,float16,fp8,0,0.17149280309677123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,8,128,1,fp8,fp8,0,0.0177824005484581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,48,128,1,float16,float16,0,0.017822399735450745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,48,128,1,float16,fp8,0,0.016256000101566314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,48,128,1,fp8,fp8,0,0.01611520051956177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,1,128,1,float16,float16,0,0.01247519999742508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,1,128,1,float16,fp8,0,0.01257600039243698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,1,128,1,fp8,fp8,0,0.013031999766826629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,2,128,1,float16,float16,0,0.012379200011491776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,2,128,1,float16,fp8,0,0.012980799376964568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,2,128,1,fp8,fp8,0,0.01257600039243698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,4,128,1,float16,float16,0,0.01268479973077774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,4,128,1,float16,fp8,0,0.012727999687194824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,4,128,1,fp8,fp8,0,0.012923200428485871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,8,128,1,float16,float16,0,0.012608000636100769
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,8,128,1,fp8,fp8,0,0.012932799756526947
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,48,128,1,float16,float16,0,0.014084799587726593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,48,1,128,1,float16,float16,0,0.04321439862251282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,48,128,1,fp8,fp8,0,0.012996800243854523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,1,128,1,float16,float16,0,0.012104000151157378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,1,128,1,float16,fp8,0,0.012372799962759019
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,2,128,1,float16,float16,0,0.011825600266456604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,2,128,1,float16,fp8,0,0.012172800302505494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,2,128,1,fp8,fp8,0,0.01228479966521263
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,4,128,1,float16,float16,0,0.01213119998574257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,4,128,1,float16,fp8,0,0.012368000298738479
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,48,8,128,1,float16,float16,0,0.026862400770187377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,4,128,1,fp8,fp8,0,0.01218400001525879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,8,128,1,float16,float16,0,0.012222400307655335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,8,128,1,float16,fp8,0,0.012195199728012085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,48,128,1,fp8,fp8,0,0.024566400051116943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,8,128,1,fp8,fp8,0,0.012468799948692322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,48,128,1,float16,float16,0,0.011363200098276138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,48,128,1,fp8,fp8,0,0.010704000294208527
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,1,128,1,float16,float16,0,0.011767999827861786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,1,128,1,float16,fp8,0,0.012358400225639343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,8,128,1,float16,fp8,0,0.01764000058174133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,1,128,1,fp8,fp8,0,0.012459199875593185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,2,128,1,float16,fp8,0,0.012436799705028534
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,2,128,1,float16,float16,0,0.011819200217723846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,2,128,1,fp8,fp8,0,0.012372799962759019
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,4,128,1,float16,float16,0,0.0118928000330925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,4,128,1,float16,fp8,0,0.01255040019750595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,4,128,1,fp8,fp8,0,0.012417600303888322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,8,128,1,float16,float16,0,0.011913599818944931
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,8,128,1,float16,fp8,0,0.012353599816560746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,8,128,1,fp8,fp8,0,0.012408000230789185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,48,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,48,128,1,fp8,fp8,0,0.010632000118494033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,1,128,1,float16,float16,0,0.011907199770212174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,48,8,128,1,float16,fp8,0,0.013070400059223174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,1,128,1,float16,fp8,0,0.012142399698495865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,2,128,1,float16,float16,0,0.011689600348472596
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,2,128,1,float16,fp8,0,0.011984000355005265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,2,128,1,fp8,fp8,0,0.011905600130558015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,4,128,1,float16,float16,0,0.011801599711179733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,4,128,1,float16,fp8,0,0.011926399916410447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,4,128,1,fp8,fp8,0,0.01212640032172203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,1,128,1,fp8,fp8,0,0.012331199645996094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,8,128,1,float16,fp8,0,0.012049599736928939
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,8,128,1,fp8,fp8,0,0.012097600102424621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,48,1,128,1,float16,float16,0,0.07844160199165344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,48,1,128,1,float16,fp8,0,0.07999680042266846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,48,1,128,1,fp8,fp8,0,0.07998560070991516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,48,48,128,1,float16,float16,0,0.03052479922771454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,48,2,128,1,float16,float16,0,0.07948639988899231
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,48,48,128,1,float16,fp8,0,0.010907199978828431
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,48,2,128,1,float16,fp8,0,0.08016319870948792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,48,2,128,1,fp8,fp8,0,0.08034560084342957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,48,4,128,1,float16,fp8,0,0.09752479791641236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,48,4,128,1,float16,float16,0,0.09385120272636413
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,48,4,128,1,fp8,fp8,0,0.09793760180473328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,48,8,128,1,float16,float16,0,0.10114079713821411
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,48,8,128,1,fp8,fp8,0,0.1002959966659546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,48,128,1,float16,float16,0,0.16972800493240356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,48,128,1,float16,fp8,0,0.1496224045753479
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,48,128,1,float16,float16,0,0.01125440001487732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,48,128,1,fp8,fp8,0,0.14903520345687865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,1,128,1,float16,fp8,0,0.04068160057067871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,1,128,1,fp8,fp8,0,0.040078398585319516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,2,128,1,float16,fp8,0,0.04046559929847717
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,2,128,1,fp8,fp8,0,0.0405023992061615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,48,48,128,1,float16,fp8,0,0.013104000687599182
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,4,128,1,float16,float16,0,0.05347359776496887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,4,128,1,float16,fp8,0,0.050251197814941403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,8,128,1,float16,float16,0,0.011899200081825257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,8,128,1,float16,float16,0,0.05598400235176086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,8,128,1,fp8,fp8,0,0.05401920080184937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,48,128,1,float16,float16,0,0.0913807988166809
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,48,128,1,float16,fp8,0,0.07669919729232788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,48,128,1,fp8,fp8,0,0.07610719799995422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,1,128,1,float16,float16,0,0.02242400050163269
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,1,128,1,float16,fp8,0,0.023337599635124207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,1,128,1,fp8,fp8,0,0.02359520047903061
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,2,128,1,float16,float16,0,0.022566400468349457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,2,128,1,float16,fp8,0,0.023507200181484222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,4,128,1,float16,float16,0,0.026372799277305604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,4,128,1,float16,fp8,0,0.027790400385856628
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,4,128,1,fp8,fp8,0,0.027750399708747864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,8,128,1,float16,float16,0,0.026740801334381104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,48,8,128,1,float16,fp8,0,0.10029120445251465
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,8,128,1,float16,fp8,0,0.027641600370407103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,8,128,1,fp8,fp8,0,0.02791520059108734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,48,128,1,float16,float16,0,0.04457120001316071
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,48,128,1,float16,fp8,0,0.04038400053977966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,48,128,1,fp8,fp8,0,0.04015519917011261
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,1,128,1,float16,float16,0,0.04412800073623657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,1,128,1,float16,fp8,0,0.017888000607490538
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,1,128,1,fp8,fp8,0,0.017843200266361235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,2,128,1,float16,float16,0,0.04565759897232056
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,48,1,128,1,fp8,fp8,0,0.012129600346088409
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,2,128,1,float16,fp8,0,0.017854399979114532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,2,128,1,fp8,fp8,0,0.017977599799633027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,4,128,1,float16,float16,0,0.01751199960708618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,4,128,1,fp8,fp8,0,0.01796800047159195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,8,128,1,float16,float16,0,0.0178399994969368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,8,128,1,float16,fp8,0,0.017759999632835387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,4,128,1,fp8,fp8,0,0.05034400224685669
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,8,128,1,fp8,fp8,0,0.018078400194644927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,48,128,1,float16,fp8,0,0.023553599417209626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,48,128,1,fp8,fp8,0,0.02398719936609268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,1,128,1,float16,float16,0,0.012172800302505494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,1,128,1,float16,fp8,0,0.012876799702644348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,1,128,1,fp8,fp8,0,0.012636800110340119
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,48,8,128,1,float16,fp8,0,0.053908801078796385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,2,128,1,float16,fp8,0,0.012433599680662155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,4,128,1,float16,float16,0,0.01241919994354248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,4,128,1,float16,fp8,0,0.01266240030527115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,4,128,1,fp8,fp8,0,0.012670400738716125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,48,2,128,1,fp8,fp8,0,0.02332800030708313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,8,128,1,float16,float16,0,0.012398400157690049
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,8,128,1,float16,fp8,0,0.012654399871826172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,8,128,1,fp8,fp8,0,0.012695999443531036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,48,128,1,float16,float16,0,0.01698880046606064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,48,128,1,float16,fp8,0,0.015852800011634825
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,48,128,1,fp8,fp8,0,0.015889599919319153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,1,128,1,float16,float16,0,0.012230399996042252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,1,128,1,float16,fp8,0,0.012350399792194367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,1,128,1,fp8,fp8,0,0.012561599910259246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,2,128,1,float16,float16,0,0.012059199810028075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,1,128,1,float16,float16,0,0.017153599858283998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,2,128,1,float16,fp8,0,0.012398400157690049
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,2,128,1,fp8,fp8,0,0.012353599816560746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,4,128,1,float16,float16,0,0.012116800248622894
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,2,128,1,float16,float16,0,0.017428800463676453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,8,128,1,float16,float16,0,0.012163200229406358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,48,4,128,1,float16,fp8,0,0.017800000309944154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,8,128,1,fp8,fp8,0,0.012188799679279327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,48,128,1,float16,float16,0,0.013627199828624726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,48,128,1,float16,fp8,0,0.012878400087356568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,48,128,1,fp8,fp8,0,0.012967999279499053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,1,128,1,float16,float16,0,0.011926399916410447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,2,128,1,float16,float16,0,0.012628799676895142
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,1,128,1,float16,fp8,0,0.012336000055074691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,1,128,1,fp8,fp8,0,0.012439999729394913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,2,128,1,fp8,fp8,0,0.012705600261688233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,2,128,1,float16,fp8,0,0.012326399981975555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,2,128,1,fp8,fp8,0,0.012368000298738479
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,4,128,1,float16,float16,0,0.01175839975476265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,4,128,1,float16,fp8,0,0.01226079985499382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,8,128,1,float16,float16,0,0.011574400216341018
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,8,128,1,float16,fp8,0,0.012332800030708312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,8,128,1,fp8,fp8,0,0.012150400131940842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,48,128,1,float16,float16,0,0.011327999830245971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,48,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,48,128,1,fp8,fp8,0,0.01067039966583252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,1,128,1,float16,float16,0,0.011963199824094772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,1,128,1,float16,fp8,0,0.01207519993185997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,1,128,1,fp8,fp8,0,0.012012799829244613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,2,128,1,float16,float16,0,0.01159520000219345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,2,128,1,float16,fp8,0,0.012099199742078782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,2,128,1,fp8,fp8,0,0.012006399780511856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,4,128,1,float16,float16,0,0.011745599657297134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,4,128,1,float16,fp8,0,0.012104000151157378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,4,128,1,fp8,fp8,0,0.012027200311422348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,8,128,1,float16,float16,0,0.01154239997267723
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,8,128,1,float16,fp8,0,0.011697600036859513
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,48,8,128,1,fp8,fp8,0,0.01183520033955574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,48,128,1,float16,float16,0,0.010943999886512757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,4,128,1,float16,fp8,0,0.012324800342321396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,48,128,1,fp8,fp8,0,0.01026879996061325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,1,128,1,float16,float16,0,0.01135680004954338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,1,128,1,float16,fp8,0,0.011771199852228164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,48,48,128,1,float16,float16,0,0.025332799553871153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,1,128,1,fp8,fp8,0,0.011796800047159195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,2,128,1,float16,float16,0,0.011580800265073776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,2,128,1,fp8,fp8,0,0.011907199770212174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,4,128,1,float16,float16,0,0.011630400270223617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,2,128,1,float16,float16,0,0.011931200325489045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,4,128,1,float16,fp8,0,0.011776000261306763
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,4,128,1,fp8,fp8,0,0.011844799667596818
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,8,128,1,float16,float16,0,0.011321599781513213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,8,128,1,float16,fp8,0,0.01162080019712448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,8,128,1,fp8,fp8,0,0.01175680011510849
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,48,4,128,1,fp8,fp8,0,0.01228640004992485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,48,1,128,1,float16,float16,0,0.017766399681568144
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,48,1,128,1,float16,fp8,0,0.017820799350738527
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,48,2,128,1,float16,float16,0,0.023947200179100035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,48,2,128,1,float16,fp8,0,0.023366400599479677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,48,2,128,1,fp8,fp8,0,0.023455999791622162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,48,4,128,1,float16,float16,0,0.035120001435279845
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,48,4,128,1,float16,fp8,0,0.03442240059375763
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,48,4,128,1,fp8,fp8,0,0.034646400809288026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,48,8,128,1,float16,fp8,0,0.05573599934577942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,48,8,128,1,fp8,fp8,0,0.056220799684524536
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,48,128,1,float16,float16,0,0.12661759853363036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,48,128,1,float16,fp8,0,0.12528959512710572
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,1,128,1,float16,float16,0,0.012787200510501862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,48,128,1,fp8,fp8,0,0.124344003200531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,1,128,1,float16,fp8,0,0.012740799784660339
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,1,128,1,fp8,fp8,0,0.01276479959487915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,2,128,1,float16,float16,0,0.015447999536991119
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,2,128,1,float16,fp8,0,0.015225599706172942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,2,128,1,fp8,fp8,0,0.015412800014019012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,4,128,1,float16,float16,0,0.02105119973421097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,4,128,1,float16,fp8,0,0.020927999913692475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,4,128,1,fp8,fp8,0,0.02094080001115799
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,8,128,1,float16,float16,0,0.03227199912071228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,8,128,1,float16,fp8,0,0.03171519935131073
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,48,8,128,1,fp8,fp8,0,0.03175039887428284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,48,128,1,float16,float16,0,0.06841920018196106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,48,128,1,float16,fp8,0,0.06761599779129028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,48,128,1,fp8,fp8,0,0.06721439957618713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,4,128,1,fp8,fp8,0,0.012508800625801087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,1,128,1,fp8,fp8,0,0.011379200220108032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,48,8,128,1,float16,fp8,0,0.01242400035262108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,2,128,1,float16,float16,0,0.011247999966144562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,2,128,1,float16,fp8,0,0.011791999638080596
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,4,128,1,float16,float16,0,0.014227199554443359
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,2,128,1,float16,fp8,0,0.011796800047159195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,4,128,1,float16,fp8,0,0.014183999598026275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,4,128,1,fp8,fp8,0,0.014192000031471252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,8,128,1,float16,float16,0,0.019750399887561797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,8,128,1,float16,fp8,0,0.019436800479888917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,8,128,1,fp8,fp8,0,0.0194255992770195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,48,1,128,1,fp8,fp8,0,0.018268799781799315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,48,128,1,float16,fp8,0,0.03842560052871704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,48,128,1,fp8,fp8,0,0.03841759860515594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,1,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,1,128,1,float16,fp8,0,0.01032159999012947
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,1,128,1,fp8,fp8,0,0.010356800258159637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,2,128,1,float16,float16,0,0.010609599947929382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,2,128,1,float16,fp8,0,0.010630399733781815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,2,128,1,fp8,fp8,0,0.010289599746465683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,4,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,4,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,4,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,8,128,1,float16,float16,0,0.013073599338531494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,8,128,1,float16,fp8,0,0.013078400492668152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,8,128,1,fp8,fp8,0,0.013177600502967835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,48,128,1,float16,float16,0,0.024240000545978545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,48,128,1,float16,fp8,0,0.023004800081253052
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,1,128,1,float16,float16,0,0.011380799859762192
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,48,128,1,fp8,fp8,0,0.023017600178718567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,1,128,1,float16,float16,0,0.01010880023241043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,1,128,1,float16,fp8,0,0.010102400183677673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,1,128,1,fp8,fp8,0,0.009895999729633332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,2,128,1,float16,fp8,0,0.009841600060462951
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,2,128,1,fp8,fp8,0,0.011351999640464783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,2,128,1,fp8,fp8,0,0.00981760025024414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,4,128,1,float16,float16,0,0.010028800368309021
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,4,128,1,float16,fp8,0,0.009998399764299393
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,4,128,1,fp8,fp8,0,0.009948799759149552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,8,128,1,float16,float16,0,0.010208000242710114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,8,128,1,float16,fp8,0,0.010239999741315842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,48,128,1,float16,float16,0,0.016334399580955505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,48,128,1,float16,fp8,0,0.015440000593662262
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,48,128,1,fp8,fp8,0,0.015401600301265717
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,1,128,1,float16,float16,0,0.00974240005016327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,1,128,1,float16,fp8,0,0.009726399928331375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,48,48,128,1,float16,float16,0,0.03918400108814239
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,1,128,1,fp8,fp8,0,0.00968960002064705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,2,128,1,float16,float16,0,0.009812799841165542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,2,128,1,float16,fp8,0,0.009841600060462951
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,2,128,1,fp8,fp8,0,0.009702400118112565
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,4,128,1,float16,float16,0,0.009956800192594529
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,4,128,1,float16,fp8,0,0.009888000041246413
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,48,8,128,1,float16,float16,0,0.05648959875106811
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,8,128,1,float16,float16,0,0.01013600006699562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,8,128,1,float16,fp8,0,0.010132800042629241
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,48,128,1,float16,float16,0,0.013531200587749481
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,48,128,1,float16,fp8,0,0.012697599828243256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,48,128,1,fp8,fp8,0,0.01273760050535202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,1,128,1,float16,float16,0,0.009596800059080124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,1,128,1,float16,fp8,0,0.009524799883365631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,1,128,1,fp8,fp8,0,0.009545599669218063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,2,128,1,float16,float16,0,0.009743999689817429
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,2,128,1,float16,fp8,0,0.009497600048780442
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,2,128,1,fp8,fp8,0,0.00945120006799698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,4,128,1,float16,float16,0,0.009723199903964997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,4,128,1,float16,fp8,0,0.00952960029244423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,48,1,128,1,float16,fp8,0,0.01130400002002716
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,4,128,1,fp8,fp8,0,0.00963359996676445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,8,128,1,float16,float16,0,0.009811200201511383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,2,128,1,float16,float16,0,0.010344000160694122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,8,128,1,fp8,fp8,0,0.009609600156545639
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,48,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,48,128,1,fp8,fp8,0,0.010353600233793258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,1,128,1,float16,float16,0,0.009603200107812881
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,1,128,1,float16,fp8,0,0.009331200271844864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,48,8,128,1,fp8,fp8,0,0.010288000106811523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,1,128,1,fp8,fp8,0,0.009391999989748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,2,128,1,float16,float16,0,0.00968799963593483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,2,128,1,fp8,fp8,0,0.009515199810266495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,4,128,1,float16,float16,0,0.009747199714183807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,4,128,1,float16,fp8,0,0.009641599655151368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,4,128,1,fp8,fp8,0,0.009600000083446502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,8,128,1,float16,float16,0,0.009691199660301209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,8,128,1,fp8,fp8,0,0.00947680026292801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,4,128,1,fp8,fp8,0,0.009924799948930741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,48,128,1,float16,float16,0,0.01061440035700798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,48,128,1,float16,fp8,0,0.010252799838781357
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,48,8,128,1,fp8,fp8,0,0.010078399628400802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,48,128,1,fp8,fp8,0,0.010190399736166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,1,128,1,float16,fp8,0,0.00931359976530075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,1,128,1,fp8,fp8,0,0.0095551997423172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,2,128,1,float16,float16,0,0.0095040000975132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,2,128,1,float16,fp8,0,0.009596800059080124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,2,128,1,fp8,fp8,0,0.009441599994897843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,4,128,1,float16,float16,0,0.00952640026807785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,4,128,1,fp8,fp8,0,0.009467200189828873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,8,128,1,float16,float16,0,0.009524799883365631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,8,128,1,float16,fp8,0,0.009361600130796432
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,8,128,1,fp8,fp8,0,0.009328000247478485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,48,48,128,1,float16,fp8,0,0.010204800218343735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,48,8,128,1,float16,fp8,0,0.009881599992513656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,48,128,1,float16,float16,0,0.011044800281524658
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,2,128,1,float16,fp8,0,0.009487999975681305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,48,8,128,1,float16,fp8,0,0.009484799951314926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,1,128,1,float16,float16,0,0.009692800045013428
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,48,4,128,1,float16,fp8,0,0.009436800330877303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,40,1,128,1,fp8,fp8,0,29.257083129882812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,40,2,128,1,float16,fp8,0,29.514105224609374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,40,2,128,1,fp8,fp8,0,29.857476806640626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,40,1,128,1,float16,fp8,0,29.942416381835937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,40,1,128,1,float16,float16,0,38.65714721679687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,40,2,128,1,float16,float16,0,39.14242248535156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,40,4,128,1,float16,float16,0,38.81244812011719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,40,4,128,1,float16,fp8,0,29.61611328125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,40,4,128,1,fp8,fp8,0,30.10555419921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,1,128,1,float16,float16,0,19.8453369140625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,40,8,128,1,float16,fp8,0,30.444219970703124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,40,8,128,1,fp8,fp8,0,30.37110290527344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,40,8,128,1,float16,float16,0,38.23389587402344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,1,128,1,float16,fp8,0,15.005329895019532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,1,128,1,fp8,fp8,0,14.751528930664062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,2,128,1,float16,fp8,0,15.8171142578125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,2,128,1,float16,float16,0,19.911013793945312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,2,128,1,fp8,fp8,0,15.24150390625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,4,128,1,float16,fp8,0,15.186244201660156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,4,128,1,float16,float16,0,19.073715209960938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,4,128,1,fp8,fp8,0,15.172206115722656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,8,128,1,float16,fp8,0,15.08685760498047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,8,128,1,float16,float16,0,19.381390380859376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,8,128,1,fp8,fp8,0,15.238691711425782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,1,128,1,float16,float16,0,9.709375762939453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,1,128,1,float16,fp8,0,7.39734115600586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,1,128,1,fp8,fp8,0,7.498356628417969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,2,128,1,float16,float16,0,9.622456359863282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,2,128,1,float16,fp8,0,7.536262512207031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,2,128,1,fp8,fp8,0,7.5583984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,4,128,1,float16,float16,0,9.790654754638672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,4,128,1,float16,fp8,0,7.497068786621094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,4,128,1,fp8,fp8,0,7.517857360839844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,8,128,1,float16,float16,0,9.87603988647461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,8,128,1,float16,fp8,0,7.738452911376953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,8,128,1,fp8,fp8,0,7.506499481201172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,40,128,1,float16,fp8,0,3.7167343139648437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,40,128,1,float16,float16,0,4.753833770751953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,40,128,1,float16,fp8,0,7.721523284912109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,40,128,1,float16,float16,0,9.625759887695313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,40,128,1,fp8,fp8,0,3.7945278167724608
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,1,128,1,float16,float16,0,4.679052734375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,1,128,1,float16,fp8,0,3.6896942138671873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,40,40,128,1,fp8,fp8,0,7.561144256591797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,1,128,1,fp8,fp8,0,3.730588912963867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,40,128,1,float16,fp8,0,15.593905639648437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,2,128,1,float16,float16,0,4.645966339111328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,2,128,1,float16,fp8,0,3.7794113159179688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,40,128,1,fp8,fp8,0,15.325515747070312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,2,128,1,fp8,fp8,0,3.695779037475586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,4,128,1,float16,fp8,0,3.6609920501708983
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,4,128,1,float16,float16,0,4.2917633056640625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,4,128,1,fp8,fp8,0,3.70837287902832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,8,128,1,float16,fp8,0,3.7412368774414064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,8,128,1,float16,float16,0,4.502225494384765
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,40,8,128,1,fp8,fp8,0,3.693404769897461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,40,40,128,1,float16,float16,0,19.84075164794922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,40,1,128,1,float16,fp8,0,16.735757446289064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,40,1,128,1,fp8,fp8,0,16.789361572265626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,40,2,128,1,float16,fp8,0,17.226625061035158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,40,2,128,1,fp8,fp8,0,16.985105895996092
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,40,4,128,1,float16,fp8,0,17.489524841308594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,40,1,128,1,float16,float16,0,21.7808349609375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,40,2,128,1,float16,float16,0,22.646037292480468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,40,4,128,1,float16,float16,0,22.554170227050783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,40,128,1,float16,float16,0,11.804499053955078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,40,128,1,float16,fp8,0,8.980092620849609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,40,128,1,fp8,fp8,0,8.969859313964843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,1,128,1,float16,float16,0,10.695051574707032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,40,4,128,1,fp8,fp8,0,16.86060791015625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,40,8,128,1,float16,fp8,0,17.605625915527344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,40,8,128,1,fp8,fp8,0,17.28147735595703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,1,128,1,float16,fp8,0,8.569757080078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,40,8,128,1,float16,float16,0,22.30967559814453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,1,128,1,fp8,fp8,0,9.000921630859375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,2,128,1,float16,fp8,0,8.497825622558594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,2,128,1,fp8,fp8,0,8.732227325439453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,2,128,1,float16,float16,0,10.976382446289062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,4,128,1,float16,fp8,0,8.584636688232422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,4,128,1,float16,float16,0,11.224956512451172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,4,128,1,fp8,fp8,0,8.634249877929687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,40,128,1,float16,float16,0,5.739860916137696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,8,128,1,float16,fp8,0,8.680570983886719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,40,128,1,fp8,fp8,0,4.396128082275391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,8,128,1,float16,float16,0,10.820470428466797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,40,8,128,1,fp8,fp8,0,8.539956665039062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,40,128,1,float16,fp8,0,4.707588958740234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,1,128,1,float16,float16,0,5.388553619384766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,1,128,1,float16,fp8,0,4.305716705322266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,1,128,1,fp8,fp8,0,4.223886489868164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,2,128,1,float16,fp8,0,4.267228698730468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,2,128,1,float16,float16,0,5.484857559204102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,2,128,1,fp8,fp8,0,4.249126434326172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,4,128,1,float16,fp8,0,4.231390380859375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,4,128,1,float16,float16,0,5.382467269897461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,4,128,1,fp8,fp8,0,4.357737731933594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,40,128,1,float16,fp8,0,2.1786256790161134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,40,128,1,float16,float16,0,2.8748159408569336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,8,128,1,float16,fp8,0,4.208489608764649
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,8,128,1,fp8,fp8,0,4.380964660644532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,40,8,128,1,float16,float16,0,5.720478439331055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,40,128,1,fp8,fp8,0,2.2088943481445313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,1,128,1,float16,fp8,0,2.1039215087890626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,1,128,1,float16,float16,0,2.852729606628418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,1,128,1,fp8,fp8,0,2.082771110534668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,2,128,1,float16,float16,0,2.5053152084350585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,2,128,1,fp8,fp8,0,2.342958450317383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,4,128,1,float16,fp8,0,2.0962432861328124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,4,128,1,float16,float16,0,2.5958431243896483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,4,128,1,fp8,fp8,0,2.338345527648926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,8,128,1,float16,float16,0,2.5588048934936523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,8,128,1,float16,fp8,0,2.295031929016113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,8,128,1,fp8,fp8,0,2.1577327728271483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,40,2,128,1,float16,fp8,0,2.1444400787353515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,40,1,128,1,float16,fp8,0,12.026052856445313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,40,2,128,1,float16,fp8,0,11.88950424194336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,40,2,128,1,fp8,fp8,0,12.366455841064454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,40,1,128,1,float16,float16,0,15.354623413085937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,40,2,128,1,float16,float16,0,15.384698486328125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,40,1,128,1,fp8,fp8,0,12.036466979980469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,40,4,128,1,float16,fp8,0,12.103929901123047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,40,4,128,1,float16,float16,0,15.120742797851562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,40,128,1,float16,fp8,0,6.2109424591064455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,40,128,1,fp8,fp8,0,6.279620742797851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,40,128,1,float16,float16,0,8.101286315917969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,40,4,128,1,fp8,fp8,0,12.148873901367187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,1,128,1,float16,float16,0,7.388295745849609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,40,8,128,1,float16,fp8,0,12.228321838378907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,40,8,128,1,fp8,fp8,0,12.261894226074219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,40,8,128,1,float16,float16,0,15.740548706054687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,1,128,1,float16,fp8,0,5.930803298950195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,1,128,1,fp8,fp8,0,5.954732894897461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,2,128,1,fp8,fp8,0,5.975664138793945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,2,128,1,float16,fp8,0,6.242272186279297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,2,128,1,float16,float16,0,7.594881439208985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,4,128,1,float16,fp8,0,6.082571029663086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,4,128,1,float16,float16,0,8.083190155029296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,4,128,1,fp8,fp8,0,5.92039680480957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,40,128,1,float16,float16,0,4.036151885986328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,40,128,1,float16,fp8,0,3.207564926147461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,8,128,1,float16,fp8,0,6.098912048339844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,8,128,1,float16,float16,0,7.640393829345703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,40,128,1,fp8,fp8,0,3.102396774291992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,40,8,128,1,fp8,fp8,0,6.2407489776611325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,1,128,1,float16,fp8,0,2.905539131164551
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,1,128,1,float16,float16,0,3.772230529785156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,1,128,1,fp8,fp8,0,3.0993040084838865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,2,128,1,float16,float16,0,3.6467681884765626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,2,128,1,float16,fp8,0,3.158897590637207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,2,128,1,fp8,fp8,0,3.0667776107788085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,4,128,1,float16,fp8,0,3.0006687164306642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,4,128,1,float16,float16,0,3.8157585144042967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,4,128,1,fp8,fp8,0,2.934499168395996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,40,128,1,float16,fp8,0,1.574233627319336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,40,128,1,float16,float16,0,2.1097679138183594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,8,128,1,float16,fp8,0,2.98361759185791
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,40,128,1,fp8,fp8,0,1.5619232177734375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,8,128,1,float16,float16,0,3.9131393432617188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,40,8,128,1,fp8,fp8,0,3.1009552001953127
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,1,128,1,float16,fp8,0,1.4732095718383789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,1,128,1,float16,float16,0,1.8859359741210937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,1,128,1,fp8,fp8,0,1.468336009979248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,2,128,1,float16,fp8,0,1.5037232398986817
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,2,128,1,fp8,fp8,0,1.4519264221191406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,2,128,1,float16,float16,0,1.9865135192871093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,4,128,1,fp8,fp8,0,1.4613023757934571
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,4,128,1,float16,fp8,0,1.4720751762390136
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,8,128,1,float16,fp8,0,1.466163158416748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,8,128,1,float16,float16,0,1.8697248458862306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,8,128,1,fp8,fp8,0,1.4902095794677734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,40,4,128,1,float16,float16,0,1.8965440750122071
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,40,1,128,1,fp8,fp8,0,15.28638916015625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,40,1,128,1,float16,fp8,0,15.682749938964843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,40,2,128,1,float16,fp8,0,15.88439483642578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,40,2,128,1,fp8,fp8,0,15.76031951904297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,40,1,128,1,float16,float16,0,19.73926544189453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,40,2,128,1,float16,float16,0,19.876370239257813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,40,4,128,1,float16,fp8,0,15.788507080078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,40,4,128,1,float16,float16,0,20.549066162109376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,40,128,1,float16,fp8,0,8.241356658935548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,40,128,1,fp8,fp8,0,8.293672180175781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,40,128,1,float16,float16,0,10.651409912109376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,40,4,128,1,fp8,fp8,0,15.837924194335937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,1,128,1,float16,float16,0,10.172049713134765
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,40,8,128,1,float16,fp8,0,15.824517822265625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,40,8,128,1,fp8,fp8,0,16.018577575683594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,40,8,128,1,float16,float16,0,20.626158142089842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,1,128,1,float16,fp8,0,7.699449920654297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,1,128,1,fp8,fp8,0,7.801204681396484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,2,128,1,float16,fp8,0,7.715284729003907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,2,128,1,fp8,fp8,0,7.8476112365722654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,2,128,1,float16,float16,0,9.735733032226562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,4,128,1,float16,fp8,0,8.017829132080077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,4,128,1,float16,float16,0,10.097550201416016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,40,128,1,float16,fp8,0,4.1179969787597654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,4,128,1,fp8,fp8,0,7.9370880126953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,8,128,1,float16,fp8,0,8.41506576538086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,40,128,1,fp8,fp8,0,4.101279830932617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,8,128,1,float16,float16,0,9.933153533935547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,40,8,128,1,fp8,fp8,0,7.74383544921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,1,128,1,float16,float16,0,4.937393569946289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,1,128,1,float16,fp8,0,3.8246814727783205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,1,128,1,fp8,fp8,0,3.8646160125732423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,40,128,1,float16,float16,0,5.201505661010742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,2,128,1,float16,float16,0,5.021231842041016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,2,128,1,float16,fp8,0,3.9075889587402344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,2,128,1,fp8,fp8,0,3.809273529052734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,4,128,1,float16,fp8,0,3.845105743408203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,4,128,1,fp8,fp8,0,3.9239360809326174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,40,128,1,float16,float16,0,2.508644866943359
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,8,128,1,float16,float16,0,4.906219100952148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,8,128,1,fp8,fp8,0,3.930179214477539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,8,128,1,float16,fp8,0,4.055126571655274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,40,128,1,float16,fp8,0,2.0111343383789064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,40,128,1,fp8,fp8,0,2.0348848342895507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,1,128,1,float16,fp8,0,2.1456367492675783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,1,128,1,fp8,fp8,0,1.9522048950195312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,40,4,128,1,float16,float16,0,5.028035354614258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,1,128,1,float16,float16,0,2.439409637451172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,2,128,1,float16,fp8,0,1.889249610900879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,2,128,1,fp8,fp8,0,1.9183647155761718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,2,128,1,float16,float16,0,2.3015344619750975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,4,128,1,float16,float16,0,2.3540975570678713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,40,128,1,float16,float16,0,1.1713616371154785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,40,128,1,float16,fp8,0,1.1644304275512696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,4,128,1,float16,fp8,0,1.9484800338745116
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,4,128,1,fp8,fp8,0,1.9283584594726562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,40,128,1,fp8,fp8,0,1.1215744018554688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,8,128,1,fp8,fp8,0,1.9142160415649414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,8,128,1,float16,float16,0,2.1968223571777346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,40,8,128,1,float16,fp8,0,2.071327972412109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,1,128,1,float16,float16,0,1.1852527618408204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,1,128,1,float16,fp8,0,1.0218527793884278
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,1,128,1,fp8,fp8,0,0.9789792060852051
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,2,128,1,float16,float16,0,1.1188799858093261
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,2,128,1,float16,fp8,0,1.007151985168457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,4,128,1,float16,fp8,0,0.9860960006713867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,4,128,1,float16,float16,0,1.212820816040039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,4,128,1,fp8,fp8,0,1.0231247901916505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,8,128,1,float16,fp8,0,0.9587759971618652
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,8,128,1,float16,float16,0,1.1272128105163575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,8,128,1,fp8,fp8,0,1.0254544258117675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,40,2,128,1,fp8,fp8,0,0.9707551956176758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,40,1,128,1,float16,fp8,0,8.786811065673827
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,40,1,128,1,fp8,fp8,0,8.854360198974609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,40,2,128,1,float16,fp8,0,9.021880340576171
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,40,2,128,1,fp8,fp8,0,9.057441711425781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,40,1,128,1,float16,float16,0,11.543144226074219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,40,2,128,1,float16,float16,0,11.632324981689454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,40,4,128,1,float16,float16,0,11.524820709228516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,40,4,128,1,float16,fp8,0,9.023099517822265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,40,128,1,float16,fp8,0,4.768086242675781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,40,128,1,fp8,fp8,0,4.760776138305664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,40,128,1,float16,float16,0,6.524394989013672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,40,4,128,1,fp8,fp8,0,9.03429946899414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,1,128,1,float16,float16,0,5.507767868041992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,40,8,128,1,float16,fp8,0,9.151628875732422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,40,8,128,1,fp8,fp8,0,9.474591827392578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,40,8,128,1,float16,float16,0,11.541145324707031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,1,128,1,float16,fp8,0,4.455926513671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,1,128,1,fp8,fp8,0,4.395331192016601
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,2,128,1,float16,fp8,0,4.394910430908203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,2,128,1,fp8,fp8,0,4.5093025207519535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,2,128,1,float16,float16,0,5.732217788696289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,4,128,1,float16,fp8,0,4.602644729614258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,4,128,1,float16,float16,0,5.732241439819336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,4,128,1,fp8,fp8,0,4.543388748168946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,40,128,1,float16,float16,0,2.9851776123046876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,40,128,1,float16,fp8,0,2.468199920654297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,8,128,1,float16,fp8,0,4.471779251098633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,40,128,1,fp8,fp8,0,2.4456783294677735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,8,128,1,float16,float16,0,5.794492721557617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,40,8,128,1,fp8,fp8,0,4.45722541809082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,1,128,1,float16,float16,0,3.020787239074707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,1,128,1,float16,fp8,0,2.2010208129882813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,1,128,1,fp8,fp8,0,2.3449071884155273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,2,128,1,float16,fp8,0,2.2360191345214844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,2,128,1,fp8,fp8,0,2.27109432220459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,2,128,1,float16,float16,0,2.7171247482299803
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,4,128,1,float16,fp8,0,2.225713539123535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,4,128,1,float16,float16,0,2.7769071578979494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,4,128,1,fp8,fp8,0,2.52423038482666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,40,128,1,float16,fp8,0,1.2807600021362304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,40,128,1,float16,float16,0,1.4491056442260741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,40,128,1,fp8,fp8,0,1.178166389465332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,8,128,1,float16,fp8,0,2.224095916748047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,8,128,1,fp8,fp8,0,2.246668815612793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,1,128,1,float16,float16,0,1.3782832145690918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,1,128,1,float16,fp8,0,1.161780834197998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,1,128,1,fp8,fp8,0,1.1280608177185059
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,2,128,1,float16,fp8,0,1.096998405456543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,2,128,1,fp8,fp8,0,1.1006192207336425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,2,128,1,float16,float16,0,1.40032958984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,4,128,1,float16,float16,0,1.2672703742980957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,4,128,1,float16,fp8,0,1.203486442565918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,4,128,1,fp8,fp8,0,1.1381135940551759
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,40,128,1,float16,float16,0,0.7747360229492187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,8,128,1,float16,fp8,0,1.1108431816101074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,8,128,1,float16,float16,0,1.3281968116760254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,40,8,128,1,fp8,fp8,0,1.1015199661254882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,40,128,1,fp8,fp8,0,0.6135968208312989
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,40,128,1,float16,fp8,0,0.6840320110321045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,1,128,1,float16,float16,0,0.6425280094146728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,1,128,1,float16,fp8,0,0.6453343868255615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,1,128,1,fp8,fp8,0,0.6158095836639405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,2,128,1,float16,fp8,0,0.5601247787475586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,2,128,1,fp8,fp8,0,0.60098237991333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,4,128,1,float16,float16,0,0.6466815948486329
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,40,8,128,1,float16,float16,0,3.088699150085449
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,4,128,1,float16,fp8,0,0.6009712219238281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,4,128,1,fp8,fp8,0,0.5931503772735596
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,8,128,1,float16,float16,0,0.642248010635376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,8,128,1,float16,fp8,0,0.5864895820617676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,2,128,1,float16,float16,0,0.6418735980987549
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,40,8,128,1,fp8,fp8,0,0.5846735954284668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,40,1,128,1,fp8,fp8,0,8.316267395019532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,40,2,128,1,float16,fp8,0,8.299393463134766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,40,1,128,1,float16,float16,0,10.330372619628907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,40,2,128,1,float16,float16,0,10.452031707763672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,40,2,128,1,fp8,fp8,0,8.332083129882813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,40,1,128,1,float16,fp8,0,8.401924896240235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,40,4,128,1,float16,float16,0,10.6328125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,40,4,128,1,float16,fp8,0,8.256520080566407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,40,128,1,float16,fp8,0,4.534654235839843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,40,4,128,1,fp8,fp8,0,8.584617614746094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,40,128,1,float16,float16,0,5.835822296142578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,40,8,128,1,float16,fp8,0,8.507361602783202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,40,8,128,1,fp8,fp8,0,8.422614288330077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,40,8,128,1,float16,float16,0,10.851387023925781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,40,128,1,fp8,fp8,0,4.633041763305664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,1,128,1,float16,fp8,0,4.162819290161133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,1,128,1,float16,float16,0,5.007673645019532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,1,128,1,fp8,fp8,0,4.217270278930664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,2,128,1,float16,float16,0,5.147200012207032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,2,128,1,float16,fp8,0,4.133886337280273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,2,128,1,fp8,fp8,0,4.117316818237304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,4,128,1,float16,float16,0,5.168296051025391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,4,128,1,float16,fp8,0,4.167975997924804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,4,128,1,fp8,fp8,0,4.202811050415039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,40,128,1,float16,float16,0,2.766543960571289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,40,128,1,float16,fp8,0,2.2373952865600586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,8,128,1,float16,fp8,0,4.34760627746582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,8,128,1,float16,float16,0,5.195900726318359
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,40,8,128,1,fp8,fp8,0,4.272454452514649
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,40,128,1,fp8,fp8,0,2.3125328063964843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,1,128,1,float16,fp8,0,2.070361518859863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,1,128,1,float16,float16,0,2.3415599822998048
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,2,128,1,float16,fp8,0,2.084547233581543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,1,128,1,fp8,fp8,0,2.161030387878418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,2,128,1,float16,float16,0,2.4582895278930663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,2,128,1,fp8,fp8,0,2.152176094055176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,4,128,1,float16,float16,0,2.540551948547363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,40,128,1,float16,float16,0,1.486417579650879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,4,128,1,float16,fp8,0,2.0787679672241213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,4,128,1,fp8,fp8,0,2.0907936096191406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,40,128,1,float16,fp8,0,1.132595157623291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,8,128,1,float16,fp8,0,2.116908836364746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,8,128,1,float16,float16,0,2.633419227600098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,40,128,1,fp8,fp8,0,1.2491536140441895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,1,128,1,float16,fp8,0,1.0357343673706054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,1,128,1,fp8,fp8,0,1.0493807792663574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,2,128,1,float16,float16,0,1.170924758911133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,1,128,1,float16,float16,0,1.2941184043884277
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,2,128,1,float16,fp8,0,1.0588159561157227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,2,128,1,fp8,fp8,0,1.1834431648254395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,4,128,1,float16,fp8,0,1.0383487701416017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,4,128,1,fp8,fp8,0,1.0388223648071289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,40,128,1,float16,float16,0,0.6880799770355225
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,8,128,1,fp8,fp8,0,1.066539192199707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,40,128,1,fp8,fp8,0,0.5690688133239746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,40,128,1,float16,fp8,0,0.5698016166687012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,1,128,1,float16,float16,0,0.6035568237304687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,1,128,1,float16,fp8,0,0.5259039878845215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,1,128,1,fp8,fp8,0,0.513366413116455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,2,128,1,float16,float16,0,0.5900207996368408
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,2,128,1,float16,fp8,0,0.5437119960784912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,2,128,1,fp8,fp8,0,0.5241184234619141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,4,128,1,float16,float16,0,0.6010128021240234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,4,128,1,float16,fp8,0,0.5312320232391358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,4,128,1,fp8,fp8,0,0.5470655918121338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,8,128,1,float16,float16,0,0.6083280086517334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,8,128,1,float16,fp8,0,0.5507840156555176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,40,128,1,float16,float16,0,0.34702720642089846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,40,8,128,1,fp8,fp8,0,0.5303455829620362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,40,128,1,float16,fp8,0,0.29470560550689695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,40,128,1,fp8,fp8,0,0.3020911931991577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,1,128,1,float16,float16,0,0.30289440155029296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,1,128,1,fp8,fp8,0,0.26434400081634524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,2,128,1,float16,float16,0,0.3094415903091431
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,2,128,1,float16,fp8,0,0.26332640647888184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,2,128,1,fp8,fp8,0,0.26482720375061036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,4,128,1,float16,float16,0,1.2085087776184082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,4,128,1,float16,fp8,0,0.2650415897369385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,8,128,1,float16,fp8,0,1.1641488075256348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,40,8,128,1,float16,float16,0,1.2729200363159179
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,8,128,1,float16,float16,0,0.3094271898269653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,8,128,1,float16,fp8,0,0.2668767929077148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,8,128,1,fp8,fp8,0,0.2683840036392212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,40,8,128,1,fp8,fp8,0,2.0975200653076174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,1,128,1,float16,fp8,0,0.2626431941986084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,4,128,1,float16,float16,0,0.3093552112579346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,40,1,128,1,float16,fp8,0,4.85380973815918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,40,1,128,1,fp8,fp8,0,4.986884689331054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,40,1,128,1,float16,float16,0,5.881195068359375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,40,2,128,1,float16,float16,0,6.033158493041992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,40,4,128,1,fp8,fp8,0,0.26498239040374755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,40,2,128,1,float16,fp8,0,4.838991928100586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,40,2,128,1,fp8,fp8,0,4.851697540283203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,40,4,128,1,float16,fp8,0,4.992587280273438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,40,4,128,1,fp8,fp8,0,4.942961502075195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,40,4,128,1,float16,float16,0,6.157128143310547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,40,128,1,float16,float16,0,3.458225631713867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,40,128,1,float16,fp8,0,2.7088111877441405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,40,8,128,1,float16,fp8,0,4.990060806274414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,40,8,128,1,fp8,fp8,0,4.97639045715332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,40,128,1,fp8,fp8,0,2.7381616592407227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,40,8,128,1,float16,float16,0,6.223734283447266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,1,128,1,float16,fp8,0,2.5045055389404296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,1,128,1,float16,float16,0,2.8883920669555665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,1,128,1,fp8,fp8,0,2.5483680725097657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,2,128,1,float16,fp8,0,2.442462348937988
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,2,128,1,float16,float16,0,2.9140975952148436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,2,128,1,fp8,fp8,0,2.4156095504760744
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,4,128,1,float16,fp8,0,2.531328010559082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,4,128,1,fp8,fp8,0,2.448393630981445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,4,128,1,float16,float16,0,3.1960416793823243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,40,128,1,float16,fp8,0,1.361520004272461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,40,128,1,float16,float16,0,1.734231948852539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,8,128,1,float16,fp8,0,2.509480094909668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,8,128,1,fp8,fp8,0,2.4810991287231445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,40,128,1,fp8,fp8,0,1.368883228302002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,1,128,1,float16,float16,0,1.4048687934875488
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,1,128,1,float16,fp8,0,1.3822992324829102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,1,128,1,fp8,fp8,0,1.21038875579834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,2,128,1,float16,float16,0,1.5145551681518554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,2,128,1,float16,fp8,0,1.2425536155700683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,2,128,1,fp8,fp8,0,1.2160719871520995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,4,128,1,float16,float16,0,1.4080368041992188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,4,128,1,float16,fp8,0,1.3436400413513183
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,4,128,1,fp8,fp8,0,1.273366355895996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,8,128,1,float16,float16,0,1.4472944259643554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,40,128,1,float16,float16,0,0.8315808296203613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,40,128,1,fp8,fp8,0,0.7154191970825196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,40,128,1,float16,fp8,0,0.7366112232208252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,8,128,1,float16,fp8,0,1.3504544258117677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,40,8,128,1,fp8,fp8,0,1.2383343696594238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,1,128,1,float16,fp8,0,0.6959072113037109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,40,8,128,1,float16,float16,0,3.0340927124023436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,1,128,1,fp8,fp8,0,0.663918399810791
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,2,128,1,float16,fp8,0,0.6307295799255371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,2,128,1,float16,float16,0,0.7062895774841309
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,2,128,1,fp8,fp8,0,0.6301871776580811
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,4,128,1,float16,float16,0,0.7051487922668457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,4,128,1,float16,fp8,0,0.6643936157226562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,4,128,1,fp8,fp8,0,0.6172751903533935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,40,128,1,float16,float16,0,0.41939358711242675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,40,128,1,float16,fp8,0,0.3783519983291626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,8,128,1,float16,fp8,0,0.6353744029998779
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,8,128,1,fp8,fp8,0,0.6493616104125977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,40,128,1,fp8,fp8,0,0.358353590965271
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,1,128,1,float16,float16,0,0.35182719230651854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,1,128,1,float16,fp8,0,0.31436159610748293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,1,128,1,fp8,fp8,0,0.3446448087692261
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,2,128,1,float16,float16,0,0.35514719486236573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,2,128,1,fp8,fp8,0,0.32175679206848146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,4,128,1,float16,float16,0,0.3567456007003784
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,4,128,1,float16,fp8,0,0.3179791927337646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,4,128,1,fp8,fp8,0,0.3302016019821167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,8,128,1,float16,float16,0,0.3622416019439697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,8,128,1,float16,fp8,0,0.32523679733276367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,40,128,1,float16,float16,0,0.2194607973098755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,40,128,1,float16,fp8,0,0.18810720443725587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,40,128,1,fp8,fp8,0,0.17934080362319946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,1,128,1,float16,float16,0,0.18296480178833008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,1,128,1,float16,fp8,0,0.16192159652709961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,2,128,1,float16,float16,0,0.1840656042098999
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,2,128,1,float16,fp8,0,0.1620959997177124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,2,128,1,fp8,fp8,0,0.16267839670181275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,4,128,1,float16,float16,0,0.18531999588012696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,4,128,1,float16,fp8,0,0.16353440284729004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,4,128,1,fp8,fp8,0,0.16404639482498168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,8,128,1,float16,float16,0,0.1876528024673462
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,8,128,1,float16,fp8,0,0.16484639644622803
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,1,128,1,float16,float16,0,0.7041359901428222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,8,128,1,fp8,fp8,0,0.16424800157546998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,40,8,128,1,float16,float16,0,0.7324927806854248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,2,128,1,float16,fp8,0,0.31574881076812744
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,40,8,128,1,fp8,fp8,0,0.3226799964904785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,40,1,128,1,fp8,fp8,0,0.16415040493011473
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,40,1,128,1,float16,fp8,0,4.696464157104492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,40,1,128,1,fp8,fp8,0,4.706478500366211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,40,1,128,1,float16,float16,0,5.756407928466797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,40,2,128,1,float16,float16,0,5.776353454589843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,40,2,128,1,float16,fp8,0,4.760796737670899
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,40,2,128,1,fp8,fp8,0,4.721993637084961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,40,4,128,1,float16,fp8,0,4.804816055297851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,40,4,128,1,float16,float16,0,5.604286575317383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,40,4,128,1,fp8,fp8,0,4.787475204467773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,40,128,1,float16,float16,0,3.4820175170898438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,40,128,1,float16,fp8,0,2.7600191116333006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,40,8,128,1,float16,fp8,0,4.951216125488282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,40,128,1,fp8,fp8,0,2.7752336502075194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,40,8,128,1,float16,float16,0,6.084257507324219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,40,8,128,1,fp8,fp8,0,4.911211013793945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,1,128,1,float16,float16,0,2.801315116882324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,1,128,1,fp8,fp8,0,2.3545087814331054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,1,128,1,float16,fp8,0,2.427223968505859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,2,128,1,float16,fp8,0,2.3673887252807617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,2,128,1,float16,float16,0,2.811939239501953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,2,128,1,fp8,fp8,0,2.4159040451049805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,4,128,1,float16,fp8,0,2.502732849121094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,4,128,1,float16,float16,0,2.8069807052612306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,4,128,1,fp8,fp8,0,2.3889984130859374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,40,128,1,float16,fp8,0,1.379195213317871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,8,128,1,float16,float16,0,2.934921646118164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,40,128,1,fp8,fp8,0,1.4476320266723632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,8,128,1,float16,fp8,0,2.4575807571411135
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,40,8,128,1,fp8,fp8,0,2.5111215591430662
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,1,128,1,float16,float16,0,1.3414095878601073
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,1,128,1,float16,fp8,0,1.1814016342163085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,1,128,1,fp8,fp8,0,1.2938447952270509
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,2,128,1,float16,float16,0,1.349950408935547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,2,128,1,float16,fp8,0,1.2050607681274415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,2,128,1,fp8,fp8,0,1.1899888038635253
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,4,128,1,float16,float16,0,1.453606414794922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,4,128,1,float16,fp8,0,1.2031840324401855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,4,128,1,fp8,fp8,0,1.2292911529541015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,40,128,1,float16,float16,0,0.8258591651916504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,8,128,1,float16,fp8,0,1.3040399551391602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,8,128,1,fp8,fp8,0,1.2165632247924805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,40,128,1,float16,fp8,0,0.7480303764343261
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,8,128,1,float16,float16,0,1.3820639610290528
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,40,128,1,fp8,fp8,0,0.6951007843017578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,1,128,1,float16,float16,0,0.6788976192474365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,1,128,1,float16,fp8,0,0.5965663909912109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,1,128,1,fp8,fp8,0,0.6066336154937744
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,2,128,1,float16,float16,0,0.6757328033447265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,2,128,1,fp8,fp8,0,0.6143887996673584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,4,128,1,float16,float16,0,0.681112003326416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,4,128,1,float16,fp8,0,0.6350031852722168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,4,128,1,fp8,fp8,0,0.6071792125701905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,8,128,1,float16,float16,0,0.7019807815551757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,8,128,1,float16,fp8,0,0.6254384040832519
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,8,128,1,fp8,fp8,0,0.6294256210327148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,40,128,1,float16,fp8,0,0.35979199409484863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,40,128,1,fp8,fp8,0,0.3563472032546997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,1,128,1,float16,fp8,0,0.3071568012237549
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,1,128,1,fp8,fp8,0,0.30584158897399905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,2,128,1,float16,float16,0,0.3465456008911133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,2,128,1,float16,fp8,0,0.30580639839172363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,2,128,1,fp8,fp8,0,0.3084800004959106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,4,128,1,float16,fp8,0,0.30998079776763915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,4,128,1,float16,float16,0,0.34881279468536375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,4,128,1,fp8,fp8,0,0.3096447944641113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,8,128,1,float16,float16,0,0.35689918994903563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,8,128,1,float16,fp8,0,0.312007999420166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,40,128,1,float16,float16,0,0.21844959259033203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,40,128,1,float16,fp8,0,0.18536479473114015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,1,128,1,float16,float16,0,0.1768288016319275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,40,128,1,fp8,fp8,0,0.18351680040359497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,1,128,1,float16,fp8,0,0.1573791980743408
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,1,128,1,fp8,fp8,0,0.15748319625854493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,2,128,1,float16,fp8,0,0.1563599944114685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,2,128,1,fp8,fp8,0,0.15845760107040405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,4,128,1,float16,float16,0,0.1792207956314087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,40,40,128,1,float16,float16,0,1.6951967239379884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,4,128,1,fp8,fp8,0,0.1579327940940857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,40,2,128,1,float16,fp8,0,0.626251220703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,8,128,1,float16,fp8,0,0.16054079532623292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,8,128,1,float16,float16,0,0.18347200155258178
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,8,128,1,fp8,fp8,0,0.16166080236434938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,40,128,1,float16,fp8,0,0.10077279806137085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,40,128,1,fp8,fp8,0,0.10176320075988769
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,40,128,1,float16,float16,0,0.417903995513916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,1,128,1,float16,fp8,0,0.08223040103912353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,1,128,1,fp8,fp8,0,0.08179519772529602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,2,128,1,float16,float16,0,0.0930079996585846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,1,128,1,float16,float16,0,0.3491312026977539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,2,128,1,float16,fp8,0,0.08206560015678406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,4,128,1,float16,float16,0,0.09558240175247193
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,4,128,1,float16,fp8,0,0.08255360126495362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,4,128,1,fp8,fp8,0,0.08252000212669372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,8,128,1,float16,float16,0,0.09777600169181824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,8,128,1,float16,fp8,0,0.08405759930610657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,8,128,1,fp8,fp8,0,0.0841808021068573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,40,8,128,1,fp8,fp8,0,0.3131295919418335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,2,128,1,float16,float16,0,0.17753440141677856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,40,4,128,1,float16,fp8,0,0.15983359813690184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,40,128,1,float16,float16,0,0.12146559953689576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,1,128,1,float16,float16,0,0.09202880263328553
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,40,2,128,1,fp8,fp8,0,0.08209599852561951
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,40,1,128,1,float16,fp8,0,2.899283218383789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,40,1,128,1,float16,float16,0,3.266267013549805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,40,1,128,1,fp8,fp8,0,2.898956871032715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,40,2,128,1,float16,float16,0,3.3119808197021485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,40,2,128,1,float16,fp8,0,2.8856832504272463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,40,2,128,1,fp8,fp8,0,2.9156944274902346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,40,4,128,1,float16,float16,0,3.5059566497802734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,40,4,128,1,fp8,fp8,0,2.9610048294067384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,40,4,128,1,float16,fp8,0,2.944691276550293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,40,8,128,1,float16,fp8,0,3.019598388671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,40,8,128,1,float16,float16,0,3.6112831115722654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,40,128,1,float16,float16,0,2.092865562438965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,40,128,1,float16,fp8,0,1.7412191390991212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,40,8,128,1,fp8,fp8,0,3.082689666748047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,1,128,1,float16,float16,0,1.6457023620605469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,40,128,1,fp8,fp8,0,1.73863525390625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,1,128,1,float16,fp8,0,1.4458319664001464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,1,128,1,fp8,fp8,0,1.5106304168701172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,2,128,1,float16,float16,0,1.7110576629638672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,2,128,1,float16,fp8,0,1.4517711639404296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,2,128,1,fp8,fp8,0,1.4696991920471192
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,4,128,1,float16,float16,0,1.7153295516967773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,4,128,1,float16,fp8,0,1.4694064140319825
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,4,128,1,fp8,fp8,0,1.4692447662353516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,8,128,1,float16,float16,0,1.7190816879272461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,40,128,1,float16,float16,0,1.060108757019043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,8,128,1,fp8,fp8,0,1.5088335990905761
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,40,8,128,1,float16,fp8,0,1.5637727737426759
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,1,128,1,float16,float16,0,0.8099583625793457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,40,128,1,fp8,fp8,0,0.8776047706604004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,1,128,1,float16,fp8,0,0.7699920177459717
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,2,128,1,float16,float16,0,0.8203439712524414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,2,128,1,float16,fp8,0,0.743887996673584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,2,128,1,fp8,fp8,0,0.7553552150726318
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,4,128,1,float16,float16,0,0.8240943908691406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,4,128,1,float16,fp8,0,0.7622111797332763
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,4,128,1,fp8,fp8,0,0.7665487766265869
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,8,128,1,float16,float16,0,0.8560591697692871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,8,128,1,float16,fp8,0,0.7691664218902587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,40,128,1,float16,float16,0,0.5317728042602539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,8,128,1,fp8,fp8,0,0.7568016052246094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,40,128,1,float16,fp8,0,0.45609440803527834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,1,128,1,float16,fp8,0,0.370961594581604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,1,128,1,fp8,fp8,0,0.3687295913696289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,2,128,1,float16,float16,0,0.41951842308044435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,2,128,1,float16,fp8,0,0.3725600004196167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,2,128,1,fp8,fp8,0,0.371126389503479
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,4,128,1,float16,float16,0,0.42226080894470214
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,4,128,1,float16,fp8,0,0.3794944047927856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,4,128,1,fp8,fp8,0,0.37696640491485595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,40,128,1,float16,fp8,0,0.9439743995666504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,8,128,1,float16,float16,0,0.43215041160583495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,8,128,1,fp8,fp8,0,0.38761119842529296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,40,128,1,float16,float16,0,0.27119998931884765
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,40,128,1,float16,fp8,0,0.2333375930786133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,40,128,1,fp8,fp8,0,0.2373055934906006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,40,1,128,1,fp8,fp8,0,0.7808623790740967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,1,128,1,float16,float16,0,0.2131727933883667
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,1,128,1,float16,fp8,0,0.19046560525894166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,1,128,1,fp8,fp8,0,0.191921603679657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,2,128,1,float16,float16,0,0.21840479373931884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,2,128,1,float16,fp8,0,0.19136639833450317
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,4,128,1,float16,float16,0,0.21644001007080077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,4,128,1,float16,fp8,0,0.19696639776229857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,8,128,1,float16,float16,0,0.2222304105758667
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,8,128,1,float16,fp8,0,0.19906560182571412
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,8,128,1,fp8,fp8,0,0.20163040161132811
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,40,128,1,float16,fp8,0,0.12421760559082032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,40,128,1,fp8,fp8,0,0.12467199563980103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,1,128,1,float16,float16,0,0.1120959997177124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,1,128,1,float16,fp8,0,0.1001520037651062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,1,128,1,fp8,fp8,0,0.10031520128250122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,2,128,1,float16,float16,0,0.1129472017288208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,1,128,1,float16,float16,0,0.4141088008880615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,2,128,1,float16,fp8,0,0.10094239711761474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,2,128,1,fp8,fp8,0,0.10041600465774536
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,4,128,1,float16,float16,0,0.11433759927749634
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,4,128,1,float16,fp8,0,0.10254559516906739
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,4,128,1,fp8,fp8,0,0.10262080430984497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,8,128,1,float16,float16,0,0.1174239993095398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,8,128,1,float16,fp8,0,0.10409599542617798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,8,128,1,fp8,fp8,0,0.10488320589065551
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,40,128,1,float16,float16,0,0.08195840120315552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,40,128,1,float16,fp8,0,0.07000640034675598
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,1,128,1,float16,float16,0,0.05883520245552063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,1,128,1,float16,fp8,0,0.05371519923210144
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,1,128,1,fp8,fp8,0,0.054360002279281616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,2,128,1,float16,float16,0,0.05839840173721313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,2,128,1,float16,fp8,0,0.05437759757041931
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,2,128,1,fp8,fp8,0,0.0535968005657196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,4,128,1,float16,float16,0,0.06007680296897888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,4,128,1,float16,fp8,0,0.05413119792938233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,8,128,1,float16,float16,0,0.06244639754295349
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,8,128,1,float16,fp8,0,0.387774395942688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,8,128,1,float16,fp8,0,0.054953598976135255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,8,128,1,fp8,fp8,0,0.05541759729385376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,2,128,1,fp8,fp8,0,0.19262399673461914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,40,4,128,1,fp8,fp8,0,0.19577280282974244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,40,40,128,1,float16,float16,0,0.14425920248031615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,40,40,128,1,fp8,fp8,0,0.45148801803588867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,40,128,1,fp8,fp8,0,0.07025439739227295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,40,4,128,1,fp8,fp8,0,0.05469599962234497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,40,1,128,1,float16,fp8,0,3.022489547729492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,40,1,128,1,float16,float16,0,3.394015884399414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,40,1,128,1,fp8,fp8,0,3.0660032272338866
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,40,2,128,1,float16,fp8,0,3.0594127655029295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,40,2,128,1,float16,float16,0,3.454412841796875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,40,2,128,1,fp8,fp8,0,3.0485727310180666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,40,4,128,1,float16,float16,0,3.518231964111328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,40,4,128,1,float16,fp8,0,3.0852432250976562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,40,4,128,1,fp8,fp8,0,3.082012748718262
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,40,8,128,1,float16,fp8,0,3.196966361999512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,40,8,128,1,float16,float16,0,3.6796321868896484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,40,128,1,float16,float16,0,2.321623992919922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,40,8,128,1,fp8,fp8,0,3.1632400512695313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,1,128,1,float16,float16,0,1.664259147644043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,40,128,1,fp8,fp8,0,1.9188735961914063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,1,128,1,float16,fp8,0,1.5251232147216798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,1,128,1,fp8,fp8,0,1.5879216194152832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,2,128,1,float16,fp8,0,1.5377936363220215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,2,128,1,float16,float16,0,1.7164352416992188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,2,128,1,fp8,fp8,0,1.5719216346740723
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,4,128,1,float16,fp8,0,1.5527872085571288
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,4,128,1,float16,float16,0,1.706657600402832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,4,128,1,fp8,fp8,0,1.5734928131103516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,8,128,1,float16,float16,0,1.797755241394043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,8,128,1,float16,fp8,0,1.6508848190307617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,40,128,1,float16,fp8,0,1.0098143577575684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,40,128,1,float16,float16,0,1.1536208152770997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,40,128,1,fp8,fp8,0,0.9744735717773437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,8,128,1,fp8,fp8,0,1.6026975631713867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,40,40,128,1,float16,fp8,0,1.9768400192260742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,1,128,1,float16,fp8,0,0.773964786529541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,1,128,1,fp8,fp8,0,0.7901616096496582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,2,128,1,float16,fp8,0,0.7748383998870849
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,2,128,1,float16,float16,0,0.855624008178711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,2,128,1,fp8,fp8,0,0.7729104042053223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,4,128,1,float16,float16,0,0.8619487762451172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,4,128,1,float16,fp8,0,0.7871647834777832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,4,128,1,fp8,fp8,0,0.7885407924652099
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,8,128,1,float16,float16,0,0.8959551811218261
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,8,128,1,float16,fp8,0,0.8108976364135743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,40,128,1,float16,fp8,0,0.49499359130859377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,8,128,1,fp8,fp8,0,0.8208383560180664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,1,128,1,float16,float16,0,0.42648801803588865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,40,128,1,fp8,fp8,0,0.4943376064300537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,1,128,1,float16,fp8,0,0.39098401069641114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,1,128,1,fp8,fp8,0,0.391427206993103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,2,128,1,float16,float16,0,0.43030557632446287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,2,128,1,float16,fp8,0,0.39300639629364015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,2,128,1,fp8,fp8,0,0.393337607383728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,4,128,1,float16,float16,0,0.4359839916229248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,4,128,1,float16,fp8,0,0.40094242095947263
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,4,128,1,fp8,fp8,0,0.4005440235137939
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,8,128,1,float16,fp8,0,0.4115488052368164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,8,128,1,fp8,fp8,0,0.41162400245666503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,40,128,1,float16,float16,0,0.3036720037460327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,40,128,1,float16,fp8,0,0.2559999942779541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,1,128,1,float16,float16,0,0.2197472095489502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,1,128,1,float16,fp8,0,0.20176959037780762
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,1,128,1,fp8,fp8,0,0.2015712022781372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,2,128,1,float16,float16,0,0.22145919799804686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,2,128,1,float16,fp8,0,0.20270559787750245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,2,128,1,fp8,fp8,0,0.2029968023300171
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,4,128,1,float16,float16,0,0.22511680126190187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,4,128,1,float16,fp8,0,0.20659201145172118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,4,128,1,fp8,fp8,0,0.2063647985458374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,8,128,1,float16,float16,0,0.23350241184234619
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,8,128,1,float16,fp8,0,0.21228160858154296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,8,128,1,fp8,fp8,0,0.21256799697875978
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,40,128,1,float16,float16,0,0.16032960414886474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,40,128,1,float16,fp8,0,0.1368720054626465
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,1,128,1,float16,float16,0,0.11692320108413697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,1,128,1,float16,fp8,0,0.10676480531692505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,1,128,1,fp8,fp8,0,0.10736639499664306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,2,128,1,float16,float16,0,0.11768319606781005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,2,128,1,fp8,fp8,0,0.10778720378875732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,4,128,1,float16,float16,0,0.11978240013122558
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,4,128,1,float16,fp8,0,0.10983200073242187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,4,128,1,fp8,fp8,0,0.1098207950592041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,8,128,1,float16,float16,0,0.1240015983581543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,40,128,1,float16,float16,0,0.5893343925476074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,8,128,1,fp8,fp8,0,0.1130784034729004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,40,128,1,float16,fp8,0,0.07727839946746826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,40,128,1,fp8,fp8,0,0.07791200280189514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,1,128,1,float16,float16,0,0.06277279853820801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,1,128,1,float16,fp8,0,0.057529598474502563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,1,128,1,fp8,fp8,0,0.057734400033950806
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,40,8,128,1,float16,float16,0,0.45395679473876954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,2,128,1,float16,float16,0,0.06401600241661072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,2,128,1,fp8,fp8,0,0.057903999090194704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,40,40,128,1,fp8,fp8,0,0.2556063890457153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,4,128,1,float16,float16,0,0.06595360040664673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,4,128,1,float16,fp8,0,0.05820639729499817
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,4,128,1,fp8,fp8,0,0.057740801572799684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,8,128,1,float16,float16,0,0.06832960247993469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,8,128,1,float16,fp8,0,0.06003040075302124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,8,128,1,fp8,fp8,0,0.06022080183029175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,40,128,1,float16,float16,0,0.05278559923171997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,40,128,1,float16,fp8,0,0.043838399648666385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,40,1,128,1,float16,float16,0,0.8812687873840332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,40,128,1,fp8,fp8,0,0.04421440064907074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,1,128,1,float16,float16,0,0.03620159924030304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,1,128,1,fp8,fp8,0,0.03400799930095673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,1,128,1,float16,fp8,0,0.03386560082435608
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,40,128,1,fp8,fp8,0,0.13647359609603882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,2,128,1,fp8,fp8,0,0.03402880132198334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,4,128,1,float16,float16,0,0.03656319975852966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,2,128,1,float16,fp8,0,0.10759040117263793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,4,128,1,float16,fp8,0,0.034462401270866395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,4,128,1,fp8,fp8,0,0.03454880118370056
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,8,128,1,float16,float16,0,0.036980798840522765
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,8,128,1,float16,fp8,0,0.03474720120429993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,8,128,1,fp8,fp8,0,0.03463680148124695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,40,8,128,1,float16,fp8,0,0.11279200315475464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,40,128,1,float16,float16,0,0.08796319961547852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,40,2,128,1,float16,fp8,0,0.05719839930534363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,40,1,128,1,float16,fp8,0,2.241041564941406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,40,1,128,1,fp8,fp8,0,2.2705087661743164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,2,128,1,float16,float16,0,0.036348798871040346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,40,2,128,1,float16,fp8,0,0.033904001116752625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,40,2,128,1,float16,float16,0,2.3570079803466797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,40,2,128,1,float16,fp8,0,2.250223922729492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,40,2,128,1,fp8,fp8,0,2.2482431411743162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,40,4,128,1,float16,fp8,0,2.308651161193848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,40,4,128,1,float16,float16,0,2.4373968124389647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,40,4,128,1,fp8,fp8,0,2.296705627441406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,40,8,128,1,float16,float16,0,2.595475196838379
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,40,1,128,1,float16,float16,0,2.3519439697265625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,40,8,128,1,float16,fp8,0,2.4067264556884767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,40,128,1,float16,fp8,0,1.5598352432250977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,1,128,1,float16,float16,0,1.192627239227295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,1,128,1,float16,fp8,0,1.1347519874572753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,40,128,1,float16,float16,0,1.8083856582641602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,40,8,128,1,fp8,fp8,0,2.4042144775390626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,1,128,1,fp8,fp8,0,1.1249695777893067
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,2,128,1,float16,fp8,0,1.1298255920410156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,2,128,1,fp8,fp8,0,1.1487119674682618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,4,128,1,float16,float16,0,1.2246128082275392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,4,128,1,float16,fp8,0,1.1550895690917968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,4,128,1,fp8,fp8,0,1.1506591796875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,8,128,1,float16,fp8,0,1.2077280044555665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,40,128,1,float16,float16,0,0.9088144302368164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,8,128,1,float16,float16,0,1.2837136268615723
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,8,128,1,fp8,fp8,0,1.223419189453125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,40,128,1,fp8,fp8,0,0.78089599609375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,1,128,1,float16,float16,0,0.5933856010437012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,1,128,1,fp8,fp8,0,0.5691440105438232
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,40,128,1,fp8,fp8,0,1.5522064208984374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,2,128,1,float16,float16,0,0.6036831855773925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,2,128,1,float16,fp8,0,0.5691936016082764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,2,128,1,fp8,fp8,0,0.5725056171417237
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,4,128,1,float16,float16,0,0.6141312122344971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,4,128,1,fp8,fp8,0,0.588808012008667
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,4,128,1,float16,fp8,0,0.5843776226043701
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,40,2,128,1,float16,float16,0,1.1888256072998047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,8,128,1,float16,fp8,0,0.6101823806762695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,8,128,1,fp8,fp8,0,0.6093952178955078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,40,128,1,float16,float16,0,0.46416001319885253
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,40,128,1,float16,fp8,0,0.39999520778656006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,1,128,1,float16,float16,0,0.30295519828796386
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,40,128,1,fp8,fp8,0,0.4017216205596924
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,1,128,1,float16,fp8,0,0.2914463996887207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,1,128,1,fp8,fp8,0,0.2932447910308838
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,2,128,1,float16,float16,0,0.30684480667114256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,2,128,1,float16,fp8,0,0.2917248010635376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,2,128,1,fp8,fp8,0,0.2945327997207642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,4,128,1,float16,float16,0,0.3151599884033203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,4,128,1,fp8,fp8,0,0.29906721115112306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,8,128,1,float16,float16,0,0.3315632104873657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,40,128,1,float16,fp8,0,0.7838640213012695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,8,128,1,float16,fp8,0,0.31412320137023925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,8,128,1,fp8,fp8,0,0.3119600057601929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,40,128,1,float16,float16,0,0.23922080993652345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,40,128,1,float16,fp8,0,0.20701279640197753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,40,128,1,fp8,fp8,0,0.2079632043838501
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,1,128,1,float16,fp8,0,0.15166560411453248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,1,128,1,fp8,fp8,0,0.15178719758987427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,2,128,1,float16,float16,0,0.16011199951171876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,2,128,1,float16,fp8,0,0.1525760054588318
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,1,128,1,float16,fp8,0,0.5745952129364014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,4,128,1,float16,float16,0,0.16419999599456786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,4,128,1,float16,fp8,0,0.15542080402374267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,4,128,1,fp8,fp8,0,0.15578399896621703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,8,128,1,float16,float16,0,0.17355359792709352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,8,128,1,float16,fp8,0,0.1624575972557068
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,8,128,1,fp8,fp8,0,0.16220320463180543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,40,128,1,float16,float16,0,0.12772799730300904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,40,128,1,float16,fp8,0,0.11197279691696167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,1,128,1,float16,fp8,0,0.08228800296783448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,40,128,1,fp8,fp8,0,0.1115839958190918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,1,128,1,fp8,fp8,0,0.08210880160331727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,2,128,1,float16,float16,0,0.08728320002555848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,2,128,1,float16,fp8,0,0.0827023983001709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,2,128,1,fp8,fp8,0,0.08292319774627685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,4,128,1,float16,float16,0,0.08953760266304016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,4,128,1,float16,fp8,0,0.08409439921379089
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,4,128,1,fp8,fp8,0,0.0841808021068573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,40,8,128,1,float16,float16,0,0.6478079795837403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,8,128,1,float16,fp8,0,0.08740640282630921
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,8,128,1,fp8,fp8,0,0.08742880225181579
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,40,128,1,float16,fp8,0,0.06335840225219727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,40,128,1,fp8,fp8,0,0.06317920088768006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,1,128,1,float16,float16,0,0.04755519926548004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,1,128,1,float16,fp8,0,0.04403519928455353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,1,128,1,fp8,fp8,0,0.044017601013183597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,2,128,1,float16,float16,0,0.04848000109195709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,2,128,1,float16,fp8,0,0.044414401054382324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,2,128,1,fp8,fp8,0,0.044675201177597046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,4,128,1,float16,float16,0,0.05034400224685669
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,40,4,128,1,float16,fp8,0,0.30092320442199705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,4,128,1,float16,fp8,0,0.04506880044937134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,4,128,1,fp8,fp8,0,0.04514879882335663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,8,128,1,float16,fp8,0,0.047270399332046506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,8,128,1,fp8,fp8,0,0.04720639884471893
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,40,128,1,float16,float16,0,0.04273920059204102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,1,128,1,float16,float16,0,0.15874240398406983
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,40,128,1,float16,fp8,0,0.03564319908618927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,40,128,1,fp8,fp8,0,0.035780799388885495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,1,128,1,float16,fp8,0,0.027532801032066345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,40,2,128,1,fp8,fp8,0,0.15218559503555298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,1,128,1,fp8,fp8,0,0.027513599395751952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,2,128,1,float16,float16,0,0.028177601099014283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,2,128,1,float16,fp8,0,0.02740960121154785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,2,128,1,fp8,fp8,0,0.027425599098205567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,4,128,1,float16,float16,0,0.028624001145362853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,4,128,1,float16,fp8,0,0.02762399911880493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,4,128,1,fp8,fp8,0,0.02747359871864319
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,8,128,1,float16,float16,0,0.02900800108909607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,8,128,1,float16,fp8,0,0.02773439884185791
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,1,128,1,float16,float16,0,0.08599039912223816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,40,128,1,float16,float16,0,0.024172799289226533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,40,128,1,float16,fp8,0,0.023745599389076232
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,40,128,1,fp8,fp8,0,0.023835200071334838
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,1,128,1,float16,fp8,0,0.018918399512767792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,1,128,1,fp8,fp8,0,0.018937599658966065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,40,8,128,1,float16,float16,0,0.09390079975128174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,2,128,1,float16,float16,0,0.018681600689888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,2,128,1,float16,fp8,0,0.019070400297641753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,40,128,1,float16,float16,0,0.07089279890060425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,2,128,1,fp8,fp8,0,0.018987199664115904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,4,128,1,float16,float16,0,0.018854400515556334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,4,128,1,float16,fp8,0,0.01932159960269928
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,8,128,1,float16,float16,0,0.019120000302791595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,8,128,1,float16,fp8,0,0.019289599359035493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,8,128,1,fp8,fp8,0,0.019494399428367615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,40,8,128,1,float16,float16,0,0.052795201539993286
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,40,1,128,1,float16,float16,0,0.9177167892456055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,40,1,128,1,float16,fp8,0,0.9351200103759766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,1,128,1,float16,float16,0,0.0280784010887146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,40,1,128,1,fp8,fp8,0,0.9301152229309082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,40,2,128,1,float16,float16,0,0.9291760444641113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,40,2,128,1,float16,fp8,0,0.9370976448059082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,40,8,128,1,fp8,fp8,0,0.027657601237297057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,40,2,128,1,fp8,fp8,0,0.9320431709289551
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,1,128,1,float16,float16,0,0.01839040070772171
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,40,4,128,1,float16,float16,0,0.9620880126953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,40,4,128,1,float16,fp8,0,0.9554368019104004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,40,4,128,1,fp8,fp8,0,0.9615568161010742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,40,4,128,1,fp8,fp8,0,0.0189423993229866
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,40,8,128,1,float16,float16,0,1.0249695777893066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,40,8,128,1,float16,fp8,0,0.9941871643066407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,40,8,128,1,fp8,fp8,0,0.9873567581176758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,40,128,1,float16,float16,0,0.7924111843109131
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,1,128,1,float16,float16,0,0.4661712169647217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,40,128,1,float16,fp8,0,0.6615200042724609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,1,128,1,float16,fp8,0,0.4708079814910889
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,40,128,1,fp8,fp8,0,0.6638000011444092
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,1,128,1,fp8,fp8,0,0.47114877700805663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,2,128,1,float16,float16,0,0.4676368236541748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,2,128,1,float16,fp8,0,0.4722752094268799
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,2,128,1,fp8,fp8,0,0.4728032112121582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,4,128,1,float16,float16,0,0.4888512134552002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,4,128,1,float16,fp8,0,0.48391361236572267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,4,128,1,fp8,fp8,0,0.48820319175720217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,8,128,1,float16,float16,0,0.5216383934020996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,8,128,1,float16,fp8,0,0.5035520076751709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,40,128,1,float16,fp8,0,0.3376895904541016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,40,128,1,float16,float16,0,0.4040719985961914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,1,128,1,float16,float16,0,0.2388000011444092
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,1,128,1,float16,fp8,0,0.24375839233398439
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,2,128,1,float16,float16,0,0.24249439239501952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,2,128,1,float16,fp8,0,0.24263200759887696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,2,128,1,fp8,fp8,0,0.24219999313354493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,4,128,1,float16,fp8,0,0.2507184028625488
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,4,128,1,fp8,fp8,0,0.24909279346466065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,8,128,1,float16,float16,0,0.26631200313568115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,8,128,1,float16,fp8,0,0.25856800079345704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,40,128,1,float16,float16,0,0.20932478904724122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,8,128,1,fp8,fp8,0,0.2607232093811035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,40,128,1,float16,fp8,0,0.17574720382690429
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,1,128,1,float16,float16,0,0.12600640058517457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,1,128,1,float16,fp8,0,0.12774879932403566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,40,128,1,fp8,fp8,0,0.17595200538635253
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,1,128,1,fp8,fp8,0,0.127019202709198
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,2,128,1,float16,float16,0,0.1277135968208313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,2,128,1,fp8,fp8,0,0.12738720178604127
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,4,128,1,float16,float16,0,0.13213440179824829
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,4,128,1,float16,fp8,0,0.13044480085372925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,4,128,1,fp8,fp8,0,0.13081120252609252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,8,128,1,float16,float16,0,0.1399727940559387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,8,128,1,float16,fp8,0,0.13576480150222778
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,40,128,1,float16,float16,0,0.1115455985069275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,40,128,1,float16,fp8,0,0.09484000205993652
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,40,128,1,fp8,fp8,0,0.09485759735107421
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,1,128,1,float16,float16,0,0.06911519765853882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,1,128,1,float16,fp8,0,0.0691424012184143
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,1,128,1,fp8,fp8,0,0.06935200095176697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,2,128,1,float16,float16,0,0.0701856017112732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,2,128,1,float16,fp8,0,0.06992959976196289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,2,128,1,fp8,fp8,0,0.06976640224456787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,4,128,1,float16,float16,0,0.07212799787521362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,40,8,128,1,fp8,fp8,0,0.5010496139526367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,40,128,1,fp8,fp8,0,0.33814239501953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,1,128,1,fp8,fp8,0,0.24330880641937255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,4,128,1,fp8,fp8,0,0.07080320119857789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,8,128,1,float16,float16,0,0.07672640085220336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,8,128,1,float16,fp8,0,0.07419360280036927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,40,4,128,1,float16,float16,0,0.2500751972198486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,40,128,1,float16,float16,0,0.06309919953346252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,40,128,1,float16,fp8,0,0.05455039739608765
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,1,128,1,float16,float16,0,0.03821280002593994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,1,128,1,float16,fp8,0,0.03802559971809387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,1,128,1,fp8,fp8,0,0.037390398979187014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,2,128,1,float16,float16,0,0.03968800008296967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,2,128,1,float16,fp8,0,0.037518399953842166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,4,128,1,float16,float16,0,0.041203200817108154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,4,128,1,float16,fp8,0,0.03820320069789886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,4,128,1,fp8,fp8,0,0.03806560039520264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,8,128,1,float16,float16,0,0.04365920126438141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,8,128,1,float16,fp8,0,0.040094399452209474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,8,128,1,fp8,fp8,0,0.03967199921607971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,8,128,1,fp8,fp8,0,0.13564800024032592
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,40,128,1,float16,float16,0,0.03747360110282898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,40,128,1,fp8,fp8,0,0.03038559854030609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,40,128,1,float16,fp8,0,0.030436798930168152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,1,128,1,float16,float16,0,0.022896000742912294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,1,128,1,float16,fp8,0,0.02414879947900772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,1,128,1,fp8,fp8,0,0.024105599522590636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,2,128,1,float16,float16,0,0.023523199558258056
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,2,128,1,float16,fp8,0,0.023958399891853333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,2,128,1,fp8,fp8,0,0.02406720072031021
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,4,128,1,float16,float16,0,0.023824000358581544
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,4,128,1,float16,fp8,0,0.024247999489307403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,4,128,1,fp8,fp8,0,0.024110400676727296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,8,128,1,float16,float16,0,0.02433760017156601
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,8,128,1,float16,fp8,0,0.024396799504756927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,40,8,128,1,fp8,fp8,0,0.024451200664043427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,40,128,1,float16,float16,0,0.021305599808692934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,40,128,1,float16,fp8,0,0.020980800688266753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,40,128,1,fp8,fp8,0,0.02099359929561615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,1,128,1,float16,float16,0,0.01669279932975769
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,1,128,1,float16,fp8,0,0.017209599912166595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,1,128,1,fp8,fp8,0,0.017374399304389953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,2,128,1,float16,float16,0,0.016873599588871004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,2,128,1,float16,fp8,0,0.01743839979171753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,4,128,1,float16,float16,0,0.01712159961462021
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,4,128,1,float16,fp8,0,0.07143999934196472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,4,128,1,float16,fp8,0,0.01754080057144165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,4,128,1,fp8,fp8,0,0.01748639941215515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,8,128,1,float16,float16,0,0.0172992005944252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,8,128,1,float16,fp8,0,0.017764799296855927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,8,128,1,fp8,fp8,0,0.01758880019187927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,40,128,1,float16,float16,0,0.015500800311565399
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,40,128,1,fp8,fp8,0,0.05433120131492615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,40,128,1,float16,fp8,0,0.015462400019168853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,40,128,1,fp8,fp8,0,0.015588800609111785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,1,128,1,float16,fp8,0,0.013928000628948212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,1,128,1,fp8,fp8,0,0.013913600146770478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,2,128,1,float16,float16,0,0.013822400569915771
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,2,128,1,float16,fp8,0,0.014176000654697419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,40,2,128,1,fp8,fp8,0,0.0383103996515274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,2,128,1,fp8,fp8,0,0.014324800670146942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,4,128,1,fp8,fp8,0,0.014476799964904785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,8,128,1,float16,fp8,0,0.014427199959754944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,8,128,1,fp8,fp8,0,0.014350399374961853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,40,2,128,1,float16,fp8,0,0.1281440019607544
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,40,1,128,1,float16,float16,0,0.4389967918395996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,40,1,128,1,float16,fp8,0,0.45952482223510743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,40,1,128,1,fp8,fp8,0,0.45932960510253906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,40,2,128,1,float16,float16,0,0.44417438507080076
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,40,2,128,1,float16,fp8,0,0.4629695892333984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,40,2,128,1,fp8,fp8,0,0.017411200702190398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,40,2,128,1,fp8,fp8,0,0.46216320991516113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,40,4,128,1,float16,float16,0,0.4567296028137207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,40,8,128,1,fp8,fp8,0,0.07375199794769287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,40,4,128,1,float16,fp8,0,0.46844801902770994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,40,4,128,1,fp8,fp8,0,0.4724448204040527
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,1,128,1,float16,float16,0,0.01369439959526062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,40,8,128,1,float16,float16,0,0.4862224102020264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,4,128,1,float16,float16,0,0.014068800210952758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,8,128,1,float16,float16,0,0.014374400675296783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,40,128,1,float16,fp8,0,0.33611838817596434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,40,8,128,1,float16,fp8,0,0.49636640548706057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,1,128,1,float16,float16,0,0.22435998916625977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,40,128,1,fp8,fp8,0,0.337443208694458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,1,128,1,float16,fp8,0,0.23393120765686035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,2,128,1,float16,float16,0,0.22773120403289795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,2,128,1,fp8,fp8,0,0.23735039234161376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,4,128,1,float16,float16,0,0.23431200981140138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,4,128,1,float16,fp8,0,0.23992960453033446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,4,128,1,fp8,fp8,0,0.2418287992477417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,8,128,1,float16,float16,0,0.24873600006103516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,8,128,1,fp8,fp8,0,0.25394880771636963
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,40,128,1,float16,float16,0,0.20050399303436278
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,40,128,1,float16,fp8,0,0.1753183960914612
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,40,128,1,fp8,fp8,0,0.17541279792785644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,1,128,1,float16,float16,0,0.11780799627304077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,1,128,1,float16,fp8,0,0.12279200553894043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,1,128,1,fp8,fp8,0,0.12380160093307495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,40,4,128,1,float16,fp8,0,0.01419519931077957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,2,128,1,float16,float16,0,0.11956479549407958
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,40,8,128,1,fp8,fp8,0,0.49214558601379393
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,2,128,1,float16,fp8,0,0.1232975959777832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,40,128,1,float16,float16,0,0.3872992038726807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,4,128,1,float16,float16,0,0.1237712025642395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,4,128,1,fp8,fp8,0,0.1265071988105774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,8,128,1,float16,float16,0,0.13041119575500487
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,2,128,1,float16,fp8,0,0.23436479568481444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,8,128,1,float16,fp8,0,0.13369760513305665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,8,128,1,fp8,fp8,0,0.13287680149078368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,40,128,1,float16,float16,0,0.10707839727401733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,40,128,1,fp8,fp8,0,0.09229599833488464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,1,128,1,float16,float16,0,0.06587839722633362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,1,128,1,float16,fp8,0,0.06607999801635742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,8,128,1,float16,fp8,0,0.2526063919067383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,2,128,1,float16,float16,0,0.06622560024261474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,2,128,1,float16,fp8,0,0.06696640253067017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,4,128,1,float16,float16,0,0.06886720061302185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,4,128,1,float16,fp8,0,0.06791679859161377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,4,128,1,fp8,fp8,0,0.06803839802742004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,8,128,1,float16,float16,0,0.07284160256385804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,8,128,1,float16,fp8,0,0.0705680012702942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,8,128,1,fp8,fp8,0,0.07089120149612427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,40,128,1,float16,float16,0,0.060361599922180174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,40,128,1,float16,fp8,0,0.049395200610160825
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,40,128,1,fp8,fp8,0,0.04943200051784515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,2,128,1,fp8,fp8,0,0.12524319887161256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,1,128,1,float16,float16,0,0.034318399429321286
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,1,128,1,float16,fp8,0,0.03346880078315735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,40,4,128,1,float16,fp8,0,0.12556480169296264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,1,128,1,fp8,fp8,0,0.033399999141693115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,2,128,1,float16,float16,0,0.03587999939918518
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,2,128,1,float16,fp8,0,0.033102399110794066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,2,128,1,fp8,fp8,0,0.033486399054527285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,4,128,1,float16,float16,0,0.03859840035438537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,40,1,128,1,fp8,fp8,0,0.23393759727478028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,8,128,1,float16,fp8,0,0.03538399934768677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,8,128,1,fp8,fp8,0,0.03560000061988831
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,40,128,1,float16,float16,0,0.036027199029922484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,40,128,1,float16,fp8,0,0.02703840136528015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,40,128,1,fp8,fp8,0,0.026526400446891786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,1,128,1,float16,float16,0,0.02178719937801361
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,1,128,1,float16,fp8,0,0.022390399873256684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,1,128,1,fp8,fp8,0,0.02265920042991638
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,2,128,1,float16,float16,0,0.02176000028848648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,2,128,1,float16,fp8,0,0.022545599937438966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,2,128,1,fp8,fp8,0,0.02244960069656372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,4,128,1,float16,float16,0,0.022280000150203705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,1,128,1,fp8,fp8,0,0.06679199934005738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,4,128,1,fp8,fp8,0,0.02271360009908676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,8,128,1,float16,float16,0,0.022865599393844603
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,8,128,1,float16,fp8,0,0.02306559979915619
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,2,128,1,fp8,fp8,0,0.06667680144309998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,8,128,1,fp8,fp8,0,0.02294880002737045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,40,128,1,float16,float16,0,0.0203232005238533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,40,128,1,float16,fp8,0,0.018857599794864656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,40,128,1,fp8,fp8,0,0.018751999735832213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,1,128,1,float16,fp8,0,0.017504000663757326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,1,128,1,fp8,fp8,0,0.017372800409793852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,2,128,1,float16,float16,0,0.01696320027112961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,2,128,1,float16,fp8,0,0.017502400279045104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,2,128,1,fp8,fp8,0,0.017496000230312347
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,4,128,1,float16,float16,0,0.017132799327373504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,4,128,1,float16,fp8,0,0.017375999689102174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,4,128,1,fp8,fp8,0,0.017441600561141968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,8,128,1,float16,float16,0,0.01727840006351471
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,8,128,1,float16,fp8,0,0.017475199699401856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,8,128,1,fp8,fp8,0,0.01743679940700531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,40,128,1,float16,float16,0,0.014017599821090698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,40,128,1,float16,fp8,0,0.013502399623394012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,1,128,1,float16,float16,0,0.01234399974346161
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,1,128,1,float16,fp8,0,0.012600000202655792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,1,128,1,fp8,fp8,0,0.012563200294971466
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,2,128,1,float16,float16,0,0.012356799840927125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,2,128,1,float16,fp8,0,0.012782399356365205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,2,128,1,fp8,fp8,0,0.012561599910259246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,4,128,1,float16,float16,0,0.012331199645996094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,4,128,1,float16,fp8,0,0.012731200456619263
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,4,128,1,fp8,fp8,0,0.012745599448680877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,8,128,1,float16,float16,0,0.012591999769210816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,4,128,1,float16,fp8,0,0.03402880132198334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,8,128,1,float16,fp8,0,0.012583999335765839
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,4,128,1,fp8,fp8,0,0.033876800537109376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,40,128,1,float16,float16,0,0.013313600420951843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,40,128,1,float16,fp8,0,0.01356319934129715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,1,128,1,float16,float16,0,0.01207360029220581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,1,128,1,float16,fp8,0,0.012223999947309494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,40,40,128,1,float16,fp8,0,0.09297279715538025
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,1,128,1,fp8,fp8,0,0.012223999947309494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,2,128,1,float16,float16,0,0.01202080026268959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,2,128,1,float16,fp8,0,0.012363199889659882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,2,128,1,fp8,fp8,0,0.012366399914026261
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,40,4,128,1,float16,fp8,0,0.02261119931936264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,4,128,1,float16,fp8,0,0.012486399710178375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,4,128,1,fp8,fp8,0,0.012268800288438797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,8,128,1,float16,float16,0,0.012220799922943115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,8,128,1,float16,fp8,0,0.012428800016641617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,40,1,128,1,float16,float16,0,0.01683039963245392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,8,128,1,fp8,fp8,0,0.012392000108957291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,40,1,128,1,float16,float16,0,0.2240976095199585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,40,1,128,1,float16,fp8,0,0.23225278854370118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,40,1,128,1,fp8,fp8,0,0.2332223892211914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,40,2,128,1,float16,float16,0,0.22607200145721434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,40,128,1,fp8,fp8,0,0.013419200479984284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,40,2,128,1,float16,fp8,0,0.23388319015502929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,40,2,128,1,fp8,fp8,0,0.23397600650787354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,40,4,128,1,float16,float16,0,0.23248798847198487
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,40,4,128,1,float16,fp8,0,0.23784639835357665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,40,4,128,1,fp8,fp8,0,0.23872640132904052
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,40,8,128,1,float16,float16,0,0.27646241188049314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,40,8,128,1,float16,fp8,0,0.2816943883895874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,40,8,128,1,fp8,fp8,0,0.28204638957977296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,40,128,1,float16,float16,0,0.2327728033065796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,40,8,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,40,8,128,1,float16,float16,0,0.041233599185943604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,40,128,1,float16,fp8,0,0.18830080032348634
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,40,128,1,fp8,fp8,0,0.013337600231170654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,1,128,1,float16,fp8,0,0.12261120080947877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,1,128,1,fp8,fp8,0,0.1239359974861145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,2,128,1,float16,fp8,0,0.12448639869689941
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,2,128,1,fp8,fp8,0,0.124236798286438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,4,128,1,float16,float16,0,0.1249343991279602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,4,128,1,float16,fp8,0,0.12641919851303102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,40,4,128,1,float16,float16,0,0.011966399848461151
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,8,128,1,float16,float16,0,0.14744319915771484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,8,128,1,float16,fp8,0,0.14782559871673584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,8,128,1,fp8,fp8,0,0.14764480590820311
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,40,128,1,float16,fp8,0,0.10037599802017212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,40,128,1,fp8,fp8,0,0.10047680139541626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,1,128,1,float16,float16,0,0.0662447988986969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,1,128,1,float16,fp8,0,0.0672815978527069
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,1,128,1,fp8,fp8,0,0.06676319837570191
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,2,128,1,float16,float16,0,0.06726400256156921
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,2,128,1,float16,fp8,0,0.0676639974117279
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,2,128,1,fp8,fp8,0,0.0675279974937439
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,4,128,1,float16,float16,0,0.0691215991973877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,4,128,1,float16,fp8,0,0.06883040070533752
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,4,128,1,fp8,fp8,0,0.06766560077667236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,8,128,1,float16,float16,0,0.08001279830932617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,8,128,1,float16,fp8,0,0.07862079739570618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,8,128,1,fp8,fp8,0,0.07851999998092651
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,40,128,1,float16,float16,0,0.06802719831466675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,40,128,1,fp8,fp8,0,0.05221440196037293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,1,128,1,float16,float16,0,0.03420960009098053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,1,128,1,float16,fp8,0,0.033399999141693115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,1,128,1,fp8,fp8,0,0.03346239924430847
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,2,128,1,float16,float16,0,0.03590880036354065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,2,128,1,float16,fp8,0,0.033611199259757994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,40,128,1,fp8,fp8,0,0.18932319879531861
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,2,128,1,fp8,fp8,0,0.03363839983940124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,1,128,1,float16,float16,0,0.11749119758605957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,4,128,1,float16,float16,0,0.03919999897480011
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,2,128,1,float16,float16,0,0.11974560022354126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,8,128,1,float16,float16,0,0.044519999623298646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,8,128,1,float16,fp8,0,0.04018400013446808
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,40,128,1,float16,float16,0,0.03744960129261017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,40,4,128,1,fp8,fp8,0,0.12706559896469116
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,40,128,1,float16,fp8,0,0.025785601139068602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,1,128,1,float16,float16,0,0.021646399796009064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,1,128,1,float16,fp8,0,0.02258719950914383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,40,40,128,1,float16,float16,0,0.12319200038909912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,1,128,1,fp8,fp8,0,0.022547200322151184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,2,128,1,float16,fp8,0,0.022652800381183624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,2,128,1,fp8,fp8,0,0.022703999280929567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,4,128,1,float16,float16,0,0.02229759991168976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,4,128,1,float16,fp8,0,0.022819200158119203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,4,128,1,fp8,fp8,0,0.02281759977340698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,8,128,1,float16,float16,0,0.022567999362945557
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,8,128,1,float16,fp8,0,0.02301599979400635
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,8,128,1,fp8,fp8,0,0.0228752002120018
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,40,128,1,float16,float16,0,0.019166399538517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,40,128,1,float16,fp8,0,0.01688160002231598
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,40,128,1,fp8,fp8,0,0.016971200704574585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,1,128,1,float16,float16,0,0.01701440066099167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,40,128,1,float16,fp8,0,0.05220639705657959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,1,128,1,fp8,fp8,0,0.017569600045681
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,2,128,1,float16,float16,0,0.016689600050449373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,2,128,1,float16,fp8,0,0.017603200674057008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,2,128,1,fp8,fp8,0,0.017371200025081635
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,4,128,1,float16,float16,0,0.016864000260829924
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,4,128,1,float16,fp8,0,0.017505599558353423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,4,128,1,fp8,fp8,0,0.0174127995967865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,8,128,1,float16,fp8,0,0.01754080057144165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,8,128,1,fp8,fp8,0,0.017478400468826295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,40,128,1,float16,float16,0,0.014431999623775482
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,4,128,1,float16,fp8,0,0.03390879929065704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,40,128,1,float16,fp8,0,0.01372320055961609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,4,128,1,fp8,fp8,0,0.03375999927520752
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,1,128,1,float16,float16,0,0.012147200107574464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,1,128,1,float16,fp8,0,0.01276639997959137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,40,8,128,1,fp8,fp8,0,0.039868798851966855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,2,128,1,float16,float16,0,0.012265600264072418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,2,128,1,float16,fp8,0,0.012542399764060973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,2,128,1,fp8,fp8,0,0.012542399764060973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,4,128,1,float16,float16,0,0.01228799968957901
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,4,128,1,float16,fp8,0,0.012539200484752655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,4,128,1,fp8,fp8,0,0.012511999905109405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,8,128,1,float16,float16,0,0.012305600196123123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,8,128,1,float16,fp8,0,0.012443199753761292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,8,128,1,fp8,fp8,0,0.01268800050020218
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,2,128,1,float16,float16,0,0.021857599914073943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,40,128,1,float16,float16,0,0.011601600050926208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,40,128,1,fp8,fp8,0,0.01111999973654747
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,1,128,1,float16,float16,0,0.012043199688196182
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,1,128,1,float16,fp8,0,0.012417600303888322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,1,128,1,fp8,fp8,0,0.01228640004992485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,2,128,1,float16,float16,0,0.012361600250005721
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,2,128,1,float16,fp8,0,0.012216000258922577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,2,128,1,fp8,fp8,0,0.012230399996042252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,4,128,1,float16,float16,0,0.011910399794578553
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,4,128,1,float16,fp8,0,0.012408000230789185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,4,128,1,fp8,fp8,0,0.012508800625801087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,1,128,1,float16,fp8,0,0.017446400225162507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,8,128,1,float16,float16,0,0.012188799679279327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,8,128,1,float16,fp8,0,0.012372799962759019
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,8,128,1,fp8,fp8,0,0.012470400333404541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,40,128,1,float16,float16,0,0.01096000000834465
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,40,128,1,float16,fp8,0,0.010862399637699128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,40,128,1,fp8,fp8,0,0.010681600123643876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,1,128,1,float16,float16,0,0.01180960014462471
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,1,128,1,float16,fp8,0,0.012064000219106674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,1,128,1,fp8,fp8,0,0.012017600238323212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,2,128,1,float16,float16,0,0.011948800086975098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,2,128,1,float16,fp8,0,0.01215839982032776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,40,8,128,1,float16,float16,0,0.017092800140380858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,2,128,1,fp8,fp8,0,0.012084800004959106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,4,128,1,float16,fp8,0,0.012166400253772736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,8,128,1,float16,float16,0,0.01183359995484352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,8,128,1,float16,fp8,0,0.012246400117874146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,40,128,1,fp8,fp8,0,0.013753600418567657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,8,128,1,fp8,fp8,0,0.012321600317955017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,40,1,128,1,fp8,fp8,0,0.012718400359153748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,40,1,128,1,float16,float16,0,0.11722719669342041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,40,1,128,1,float16,fp8,0,0.12443840503692627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,40,1,128,1,fp8,fp8,0,0.12421599626541138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,40,40,128,1,fp8,fp8,0,0.025243198871612547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,40,2,128,1,float16,float16,0,0.11987999677658082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,40,2,128,1,float16,fp8,0,0.12450560331344604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,40,4,128,1,float16,float16,0,0.138264000415802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,40,4,128,1,float16,fp8,0,0.14258079528808593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,40,4,128,1,fp8,fp8,0,0.14336800575256348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,40,40,128,1,float16,fp8,0,0.011475200206041336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,40,8,128,1,float16,float16,0,0.17096799612045288
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,40,8,128,1,float16,fp8,0,0.1762943983078003
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,40,8,128,1,fp8,fp8,0,0.1777951955795288
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,1,128,1,float16,float16,0,0.06655679941177368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,40,128,1,float16,float16,0,0.17066559791564942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,40,128,1,float16,fp8,0,0.1448480010032654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,40,128,1,fp8,fp8,0,0.14516639709472656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,1,128,1,float16,fp8,0,0.06799200177192688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,1,128,1,fp8,fp8,0,0.06778240203857422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,2,128,1,float16,fp8,0,0.06793760061264038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,2,128,1,fp8,fp8,0,0.06857759952545166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,4,128,1,float16,float16,0,0.07680479884147644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,4,128,1,float16,fp8,0,0.07703999876976013
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,4,128,1,fp8,fp8,0,0.07723199725151061
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,8,128,1,float16,float16,0,0.09272159934043885
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,8,128,1,float16,fp8,0,0.0953104019165039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,8,128,1,fp8,fp8,0,0.09556319713592529
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,40,128,1,float16,float16,0,0.0928816020488739
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,40,128,1,float16,fp8,0,0.07701119780540466
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,40,128,1,fp8,fp8,0,0.07717599868774414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,1,128,1,float16,float16,0,0.034897598624229434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,1,128,1,float16,fp8,0,0.03355039954185486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,1,128,1,fp8,fp8,0,0.03354080021381378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,2,128,1,float16,float16,0,0.036590400338172915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,2,128,1,float16,fp8,0,0.033795198798179625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,2,128,1,fp8,fp8,0,0.03359520137310028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,4,128,1,float16,float16,0,0.04222719967365265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,4,128,1,float16,fp8,0,0.038945600390434265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,4,128,1,fp8,fp8,0,0.03807680010795593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,8,128,1,float16,float16,0,0.05218719840049744
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,8,128,1,float16,fp8,0,0.048772799968719485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,40,8,128,1,fp8,fp8,0,0.049511998891830444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,40,128,1,float16,float16,0,0.049751999974250796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,4,128,1,float16,float16,0,0.01204800009727478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,1,128,1,float16,float16,0,0.0219200000166893
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,40,4,128,1,fp8,fp8,0,0.012228800356388092
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,1,128,1,fp8,fp8,0,0.02279199957847595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,1,128,1,float16,fp8,0,0.022987200319766997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,2,128,1,float16,float16,0,0.02207999974489212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,2,128,1,fp8,fp8,0,0.02276639938354492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,4,128,1,float16,float16,0,0.022148799896240235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,4,128,1,float16,fp8,0,0.022867199778556824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,4,128,1,fp8,fp8,0,0.02263839989900589
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,8,128,1,float16,float16,0,0.026675200462341307
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,8,128,1,float16,fp8,0,0.027241599559783936
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,40,128,1,float16,float16,0,0.02460319995880127
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,40,128,1,float16,fp8,0,0.021960000693798064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,40,128,1,fp8,fp8,0,0.02210720032453537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,1,128,1,float16,fp8,0,0.017312000691890716
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,40,2,128,1,fp8,fp8,0,0.1251904010772705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,1,128,1,fp8,fp8,0,0.017342400550842286
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,2,128,1,float16,float16,0,0.016857600212097167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,2,128,1,float16,fp8,0,0.0173552006483078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,2,128,1,fp8,fp8,0,0.017535999417304993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,4,128,1,float16,float16,0,0.016769599914550782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,4,128,1,float16,fp8,0,0.017497600615024568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,4,128,1,fp8,fp8,0,0.017550399899482726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,8,128,1,float16,float16,0,0.01730560064315796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,8,128,1,float16,fp8,0,0.017688000202178956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,8,128,1,fp8,fp8,0,0.017705599963665008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,40,128,1,float16,float16,0,0.017115199565887453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,40,2,128,1,float16,float16,0,0.06698240041732788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,40,128,1,float16,fp8,0,0.01618559956550598
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,40,128,1,fp8,fp8,0,0.015990400314331056
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,1,128,1,float16,float16,0,0.012508800625801087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,1,128,1,float16,fp8,0,0.012675200402736665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,1,128,1,fp8,fp8,0,0.01268640011548996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,2,128,1,float16,float16,0,0.012427199631929398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,2,128,1,float16,fp8,0,0.012697599828243256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,2,128,1,fp8,fp8,0,0.012615999579429627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,4,128,1,float16,float16,0,0.012572799623012543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,4,128,1,float16,fp8,0,0.012912000715732574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,8,128,1,float16,float16,0,0.012676799297332763
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,8,128,1,float16,fp8,0,0.01295199990272522
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,8,128,1,fp8,fp8,0,0.012889599800109864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,40,128,1,float16,float16,0,0.01372160017490387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,40,128,1,float16,fp8,0,0.013257600367069244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,40,128,1,fp8,fp8,0,0.013329599797725678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,40,128,1,float16,fp8,0,0.03895359933376312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,40,128,1,fp8,fp8,0,0.03876639902591705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,1,128,1,fp8,fp8,0,0.012615999579429627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,2,128,1,float16,fp8,0,0.02282399982213974
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,2,128,1,float16,float16,0,0.012104000151157378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,2,128,1,float16,fp8,0,0.012398400157690049
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,4,128,1,float16,float16,0,0.011977600306272507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,40,8,128,1,fp8,fp8,0,0.027131199836730957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,4,128,1,float16,fp8,0,0.012577599287033081
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,4,128,1,fp8,fp8,0,0.012401600182056428
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,8,128,1,float16,float16,0,0.01199520006775856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,40,1,128,1,float16,float16,0,0.016630400717258454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,8,128,1,float16,fp8,0,0.012438400089740754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,8,128,1,fp8,fp8,0,0.012529599666595458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,40,128,1,float16,float16,0,0.011169599741697312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,40,128,1,fp8,fp8,0,0.01101439967751503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,40,128,1,float16,fp8,0,0.011052799969911575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,1,128,1,float16,float16,0,0.011952000111341477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,1,128,1,float16,fp8,0,0.01226079985499382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,1,128,1,fp8,fp8,0,0.012092799693346024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,2,128,1,float16,float16,0,0.01189119964838028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,2,128,1,float16,fp8,0,0.012110400199890136
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,2,128,1,fp8,fp8,0,0.012331199645996094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,4,128,1,float16,float16,0,0.011795199662446975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,4,128,1,float16,fp8,0,0.012359999865293504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,4,128,1,fp8,fp8,0,0.012345600128173827
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,8,128,1,float16,float16,0,0.011849600076675414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,8,128,1,fp8,fp8,0,0.01231520026922226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,40,128,1,float16,float16,0,0.010992000252008438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,40,128,1,float16,fp8,0,0.010755199939012527
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,40,128,1,fp8,fp8,0,0.010871999710798264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,1,128,1,float16,float16,0,0.01170559972524643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,1,128,1,float16,fp8,0,0.012051200121641159
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,1,128,1,fp8,fp8,0,0.011769600212574005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,2,128,1,float16,fp8,0,0.011774399876594543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,2,128,1,fp8,fp8,0,0.011907199770212174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,4,128,1,float16,float16,0,0.011675199866294861
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,4,128,1,float16,fp8,0,0.011932799965143204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,4,128,1,fp8,fp8,0,0.011841599643230439
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,8,128,1,float16,float16,0,0.011572799831628799
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,8,128,1,float16,fp8,0,0.011819200217723846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,40,4,128,1,fp8,fp8,0,0.012887999415397644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,40,1,128,1,float16,float16,0,0.06724640130996704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,40,1,128,1,float16,fp8,0,0.06885920166969299
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,40,1,128,1,fp8,fp8,0,0.06926239728927612
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,40,2,128,1,float16,float16,0,0.07525920271873474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,40,2,128,1,float16,fp8,0,0.07757279872894288
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,40,2,128,1,fp8,fp8,0,0.07750399708747864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,1,128,1,float16,float16,0,0.011793600022792816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,40,4,128,1,float16,float16,0,0.09046239852905273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,40,4,128,1,float16,fp8,0,0.09511680006980897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,40,4,128,1,fp8,fp8,0,0.09472640156745911
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,40,8,128,1,float16,float16,0,0.09845119714736938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,2,128,1,fp8,fp8,0,0.012409599870443344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,40,8,128,1,fp8,fp8,0,0.09846559762954712
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,40,128,1,float16,float16,0,0.1448431968688965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,40,128,1,float16,fp8,0,0.12738879919052123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,1,128,1,float16,float16,0,0.03583360016345978
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,40,128,1,fp8,fp8,0,0.12814559936523437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,1,128,1,fp8,fp8,0,0.03421280086040497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,2,128,1,float16,float16,0,0.040452799201011656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,2,128,1,float16,fp8,0,0.038308799266815186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,2,128,1,fp8,fp8,0,0.03877919912338257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,4,128,1,float16,float16,0,0.050398397445678714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,4,128,1,float16,fp8,0,0.04758880138397217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,4,128,1,fp8,fp8,0,0.04744960069656372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,8,128,1,float16,float16,0,0.05380480289459229
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,8,128,1,float16,fp8,0,0.04992319941520691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,8,128,1,fp8,fp8,0,0.05091519951820374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,40,128,1,float16,float16,0,0.07836959958076477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,40,128,1,float16,fp8,0,0.06385599970817565
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,2,128,1,float16,float16,0,0.011832000315189361
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,1,128,1,float16,float16,0,0.02205280065536499
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,1,128,1,float16,fp8,0,0.02317280024290085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,1,128,1,fp8,fp8,0,0.023235200345516203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,2,128,1,float16,fp8,0,0.023174400627613067
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,40,8,128,1,fp8,fp8,0,0.011819200217723846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,2,128,1,fp8,fp8,0,0.02295999974012375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,4,128,1,float16,float16,0,0.02598400115966797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,4,128,1,float16,fp8,0,0.027483201026916503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,4,128,1,fp8,fp8,0,0.027449598908424376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,8,128,1,float16,float16,0,0.026212799549102783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,8,128,1,float16,fp8,0,0.027617600560188294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,8,128,1,fp8,fp8,0,0.027620801329612733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,40,128,1,float16,float16,0,0.03723039925098419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,40,1,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,40,128,1,float16,fp8,0,0.035227200388908385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,40,8,128,1,float16,fp8,0,0.0986352026462555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,40,128,1,fp8,fp8,0,0.03531039953231811
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,1,128,1,fp8,fp8,0,0.017612800002098083
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,2,128,1,float16,float16,0,0.016784000396728515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,40,1,128,1,float16,fp8,0,0.03469600081443787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,2,128,1,float16,fp8,0,0.017828799784183502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,2,128,1,fp8,fp8,0,0.01786399930715561
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,4,128,1,float16,float16,0,0.017203199863433837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,4,128,1,float16,fp8,0,0.017888000607490538
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,8,128,1,float16,float16,0,0.01728159934282303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,8,128,1,float16,fp8,0,0.017849600315093993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,40,8,128,1,float16,fp8,0,0.01228640004992485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,8,128,1,fp8,fp8,0,0.017884799838066102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,40,128,1,float16,float16,0,0.022526399791240694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,40,128,1,float16,fp8,0,0.02165919989347458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,40,128,1,fp8,fp8,0,0.021495999395847322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,1,128,1,float16,float16,0,0.01268640011548996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,1,128,1,float16,fp8,0,0.012467200309038163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,1,128,1,fp8,fp8,0,0.012734399735927581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,2,128,1,float16,float16,0,0.012280000001192093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,2,128,1,float16,fp8,0,0.012750400602817536
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,2,128,1,fp8,fp8,0,0.012587200105190276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,4,128,1,float16,float16,0,0.012375999987125397
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,2,128,1,float16,float16,0,0.02197439968585968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,4,128,1,fp8,fp8,0,0.01276959925889969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,8,128,1,float16,float16,0,0.012100800126791
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,8,128,1,fp8,fp8,0,0.01249760016798973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,40,128,1,float16,float16,0,0.016204799711704253
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,40,128,1,float16,fp8,0,0.015721599757671356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,40,128,1,fp8,fp8,0,0.01581120043992996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,1,128,1,float16,float16,0,0.01234399974346161
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,1,128,1,float16,fp8,0,0.012628799676895142
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,1,128,1,fp8,fp8,0,0.012582400441169738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,2,128,1,float16,float16,0,0.012062399834394454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,2,128,1,float16,fp8,0,0.012608000636100769
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,2,128,1,fp8,fp8,0,0.012494400143623352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,4,128,1,float16,float16,0,0.012137600034475327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,4,128,1,float16,fp8,0,0.012590399384498597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,1,128,1,float16,fp8,0,0.017815999686717987
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,4,128,1,fp8,fp8,0,0.012606400251388549
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,8,128,1,float16,float16,0,0.011979199945926666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,8,128,1,fp8,fp8,0,0.012292800098657608
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,4,128,1,fp8,fp8,0,0.017990399897098542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,40,128,1,float16,float16,0,0.013380800187587739
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,40,128,1,float16,fp8,0,0.013187199831008911
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,40,128,1,fp8,fp8,0,0.013307200372219085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,1,128,1,float16,float16,0,0.011772800236940384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,1,128,1,float16,fp8,0,0.012174399942159653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,1,128,1,fp8,fp8,0,0.011937599629163742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,2,128,1,float16,fp8,0,0.01226240023970604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,2,128,1,fp8,fp8,0,0.012107200175523757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,4,128,1,float16,float16,0,0.011847999691963196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,40,40,128,1,fp8,fp8,0,0.06322559714317322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,4,128,1,float16,fp8,0,0.012342400103807449
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,8,128,1,float16,float16,0,0.01156959980726242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,4,128,1,float16,fp8,0,0.012611199915409089
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,8,128,1,float16,fp8,0,0.012216000258922577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,40,128,1,float16,float16,0,0.010942400246858597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,40,8,128,1,float16,fp8,0,0.012788799405097962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,40,128,1,float16,fp8,0,0.010716799646615982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,1,128,1,float16,float16,0,0.011593600362539291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,40,128,1,fp8,fp8,0,0.011059200018644333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,1,128,1,float16,fp8,0,0.012067200243473053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,2,128,1,float16,float16,0,0.011713600158691407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,2,128,1,float16,fp8,0,0.012132800370454788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,2,128,1,fp8,fp8,0,0.011932799965143204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,4,128,1,float16,float16,0,0.011720000207424164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,4,128,1,float16,fp8,0,0.011972799897193909
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,4,128,1,fp8,fp8,0,0.012062399834394454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,8,128,1,float16,float16,0,0.011454399675130844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,8,128,1,float16,fp8,0,0.01191679984331131
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,8,128,1,fp8,fp8,0,0.011648000031709672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,40,128,1,float16,float16,0,0.010817600041627884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,40,128,1,float16,fp8,0,0.010599999874830245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,40,128,1,fp8,fp8,0,0.010582400113344192
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,1,128,1,float16,float16,0,0.011460799723863602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,1,128,1,float16,fp8,0,0.01170559972524643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,1,128,1,fp8,fp8,0,0.01175839975476265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,2,128,1,float16,float16,0,0.011481600254774094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,2,128,1,float16,fp8,0,0.011699199676513672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,2,128,1,fp8,fp8,0,0.011847999691963196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,4,128,1,float16,float16,0,0.01154400035738945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,4,128,1,fp8,fp8,0,0.01188800036907196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,40,1,128,1,float16,float16,0,0.016993600130081176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,8,128,1,float16,float16,0,0.011360000073909759
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,8,128,1,fp8,fp8,0,0.01156959980726242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,8,128,1,float16,fp8,0,0.011668799817562104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,40,1,128,1,float16,float16,0,0.01815840005874634
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,40,8,128,1,float16,fp8,0,0.012430399656295776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,40,1,128,1,fp8,fp8,0,0.018137599527835845
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,40,1,128,1,float16,fp8,0,0.017987200617790224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,40,2,128,1,float16,float16,0,0.023819200694561005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,40,2,128,1,float16,fp8,0,0.02343519926071167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,40,2,128,1,fp8,fp8,0,0.023576000332832338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,40,4,128,1,float16,float16,0,0.0348800003528595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,40,4,128,1,float16,fp8,0,0.034590399265289305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,40,4,128,1,fp8,fp8,0,0.03468799889087677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,40,8,128,1,float16,float16,0,0.05612480044364929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,2,128,1,float16,float16,0,0.011774399876594543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,40,8,128,1,float16,fp8,0,0.056092798709869385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,40,128,1,float16,float16,0,0.10808320045471191
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,4,128,1,fp8,fp8,0,0.012542399764060973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,1,128,1,float16,float16,0,0.01321759968996048
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,40,8,128,1,fp8,fp8,0,0.012118399888277055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,1,128,1,float16,fp8,0,0.012889599800109864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,1,128,1,fp8,fp8,0,0.012868799269199371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,2,128,1,float16,float16,0,0.01555359959602356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,2,128,1,float16,fp8,0,0.015488000214099884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,2,128,1,fp8,fp8,0,0.015494400262832641
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,40,1,128,1,fp8,fp8,0,0.011767999827861786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,4,128,1,float16,fp8,0,0.021169599890708924
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,4,128,1,fp8,fp8,0,0.020788800716400147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,8,128,1,float16,float16,0,0.032652801275253295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,8,128,1,float16,fp8,0,0.0316895991563797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,8,128,1,fp8,fp8,0,0.03215999901294708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,40,128,1,float16,float16,0,0.059406399726867676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,40,128,1,fp8,fp8,0,0.05911039710044861
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,1,128,1,float16,float16,0,0.011158400028944016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,1,128,1,float16,fp8,0,0.011132799834012986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,1,128,1,fp8,fp8,0,0.011283200234174728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,40,4,128,1,float16,fp8,0,0.011980800330638886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,2,128,1,float16,float16,0,0.011380799859762192
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,2,128,1,float16,fp8,0,0.01133119985461235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,4,128,1,float16,float16,0,0.014046399295330048
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,4,128,1,float16,fp8,0,0.013862399756908417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,4,128,1,fp8,fp8,0,0.01395999938249588
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,8,128,1,float16,float16,0,0.01977120041847229
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,8,128,1,float16,fp8,0,0.019676800072193145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,8,128,1,fp8,fp8,0,0.019595199823379518
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,40,128,1,float16,float16,0,0.03451200127601624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,40,128,1,float16,fp8,0,0.03432320058345795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,40,128,1,fp8,fp8,0,0.0343392014503479
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,40,8,128,1,fp8,fp8,0,0.05615360140800476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,1,128,1,float16,float16,0,0.010790400207042694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,40,128,1,float16,fp8,0,0.10710719823837281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,1,128,1,fp8,fp8,0,0.010252799838781357
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,2,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,40,128,1,fp8,fp8,0,0.1062816023826599
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,2,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,4,128,1,float16,float16,0,0.010686399787664414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,4,128,1,fp8,fp8,0,0.010633599758148194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,8,128,1,float16,float16,0,0.013124799728393555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,8,128,1,float16,fp8,0,0.013097600638866424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,8,128,1,fp8,fp8,0,0.013278399407863618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,40,4,128,1,float16,float16,0,0.021334399282932282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,40,128,1,float16,fp8,0,0.020636799931526183
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,1,128,1,float16,float16,0,0.009779199957847595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,40,128,1,fp8,fp8,0,0.020919999480247496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,1,128,1,float16,fp8,0,0.009675200283527374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,1,128,1,fp8,fp8,0,0.009719999879598618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,2,128,1,float16,float16,0,0.009907200187444686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,2,128,1,float16,fp8,0,0.009876800328493118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,40,128,1,float16,fp8,0,0.05883839726448059
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,4,128,1,float16,float16,0,0.010171200335025787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,4,128,1,float16,fp8,0,0.010209599882364273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,8,128,1,float16,float16,0,0.010305599868297577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,8,128,1,float16,fp8,0,0.010276799649000167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,8,128,1,fp8,fp8,0,0.010222399979829789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,40,128,1,float16,float16,0,0.015836800634860992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,40,2,128,1,fp8,fp8,0,0.011155200004577637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,40,128,1,fp8,fp8,0,0.015411199629306793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,1,128,1,float16,float16,0,0.009796799719333648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,1,128,1,float16,fp8,0,0.009799999743700027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,1,128,1,fp8,fp8,0,0.009777600318193436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,2,128,1,float16,float16,0,0.010158400237560272
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,2,128,1,float16,fp8,0,0.009908799827098847
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,2,128,1,fp8,fp8,0,0.00989760011434555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,4,128,1,float16,float16,0,0.01003199964761734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,4,128,1,float16,fp8,0,0.009907200187444686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,4,128,1,fp8,fp8,0,0.010044799745082855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,8,128,1,float16,float16,0,0.0102463997900486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,8,128,1,float16,fp8,0,0.01008479967713356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,8,128,1,fp8,fp8,0,0.010102400183677673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,40,128,1,float16,float16,0,0.013339200615882873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,2,128,1,float16,float16,0,0.010564800351858139
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,40,128,1,float16,fp8,0,0.013059200346469879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,40,128,1,fp8,fp8,0,0.01289760023355484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,4,128,1,float16,fp8,0,0.010660800337791442
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,1,128,1,fp8,fp8,0,0.009444800019264222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,2,128,1,float16,float16,0,0.009719999879598618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,2,128,1,float16,fp8,0,0.009484799951314926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,2,128,1,fp8,fp8,0,0.009460800141096116
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,4,128,1,float16,float16,0,0.009759999811649323
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,4,128,1,float16,fp8,0,0.009544000029563904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,4,128,1,fp8,fp8,0,0.00952640026807785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,8,128,1,float16,float16,0,0.009743999689817429
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,2,128,1,fp8,fp8,0,0.009891200065612792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,8,128,1,float16,fp8,0,0.009696000069379807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,8,128,1,fp8,fp8,0,0.009884800016880035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,4,128,1,fp8,fp8,0,0.010134399682283402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,40,128,1,float16,fp8,0,0.010678400099277497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,40,128,1,fp8,fp8,0,0.010659199953079224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,1,128,1,float16,float16,0,0.00958240032196045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,1,128,1,float16,fp8,0,0.00931520015001297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,1,128,1,fp8,fp8,0,0.009273599833250046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,2,128,1,float16,float16,0,0.00963359996676445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,2,128,1,float16,fp8,0,0.009364800155162811
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,2,128,1,fp8,fp8,0,0.009335999935865402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,4,128,1,float16,float16,0,0.009700799733400345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,4,128,1,float16,fp8,0,0.00957759991288185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,4,128,1,fp8,fp8,0,0.009657599776983262
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,8,128,1,float16,float16,0,0.0097680002450943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,8,128,1,float16,fp8,0,0.009692800045013428
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,8,128,1,fp8,fp8,0,0.009681600332260131
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,40,128,1,float16,float16,0,0.010755199939012527
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,40,128,1,float16,fp8,0,0.010596799850463866
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,40,128,1,fp8,fp8,0,0.010513599961996078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,1,128,1,float16,float16,0,0.00952960029244423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,1,128,1,float16,fp8,0,0.009275200217962265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,1,128,1,fp8,fp8,0,0.009268800169229508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,2,128,1,float16,float16,0,0.009491200000047684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,1,128,1,float16,float16,0,0.009371200203895569
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,2,128,1,float16,fp8,0,0.009358400106430053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,40,1,128,1,float16,fp8,0,0.009257599711418152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,2,128,1,fp8,fp8,0,0.009424000233411788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,4,128,1,float16,float16,0,0.009406399726867676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,4,128,1,float16,fp8,0,0.00917920023202896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,4,128,1,fp8,fp8,0,0.00931679978966713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,8,128,1,float16,float16,0,0.009364800155162811
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,8,128,1,float16,fp8,0,0.009275200217962265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,40,40,128,1,float16,float16,0,0.021396799385547637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,40,40,128,1,float16,float16,0,0.010923200100660325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,40,40,128,1,float16,fp8,0,0.015388800203800202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,40,1,128,1,float16,fp8,0,0.01056160032749176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,40,8,128,1,fp8,fp8,0,0.009716799855232239
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,32,1,128,1,float16,fp8,0,23.982652282714845
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,32,1,128,1,fp8,fp8,0,23.957664489746094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,32,2,128,1,float16,fp8,0,23.502386474609374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,32,2,128,1,fp8,fp8,0,24.027557373046875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,32,4,128,1,float16,fp8,0,23.818048095703126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,32,1,128,1,float16,float16,0,30.452072143554688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,32,2,128,1,float16,float16,0,29.831326293945313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,32,4,128,1,float16,float16,0,30.35051574707031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,32,128,1,float16,float16,0,16.646315002441405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,32,4,128,1,fp8,fp8,0,25.080946350097655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,1,128,1,float16,float16,0,15.832095336914062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,32,8,128,1,float16,fp8,0,24.770292663574217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,32,8,128,1,fp8,fp8,0,25.404644775390626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,32,8,128,1,float16,float16,0,32.061328125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,1,128,1,float16,fp8,0,11.904500579833984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,1,128,1,fp8,fp8,0,12.080397033691407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,2,128,1,float16,fp8,0,11.755083465576172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,2,128,1,fp8,fp8,0,12.05228500366211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,2,128,1,float16,float16,0,15.10758056640625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,4,128,1,float16,fp8,0,11.976972961425782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,4,128,1,float16,float16,0,15.614013671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,4,128,1,fp8,fp8,0,12.405292510986328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,8,128,1,float16,fp8,0,12.378179168701172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,8,128,1,fp8,fp8,0,12.104135894775391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,32,128,1,float16,float16,0,7.992116546630859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,8,128,1,float16,float16,0,15.574957275390625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,1,128,1,float16,fp8,0,5.978340911865234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,1,128,1,fp8,fp8,0,6.231631851196289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,1,128,1,float16,float16,0,8.170428466796874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,2,128,1,float16,float16,0,7.6073661804199215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,2,128,1,float16,fp8,0,5.955129623413086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,2,128,1,fp8,fp8,0,5.947592163085938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,4,128,1,float16,fp8,0,5.955193710327149
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,4,128,1,float16,float16,0,8.010797119140625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,4,128,1,fp8,fp8,0,5.943673706054687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,8,128,1,float16,float16,0,7.8546897888183596
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,8,128,1,float16,fp8,0,5.919659042358399
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,8,128,1,fp8,fp8,0,6.0715984344482425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,32,128,1,float16,float16,0,3.9292110443115233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,1,128,1,float16,fp8,0,3.0733728408813477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,1,128,1,float16,float16,0,3.740591812133789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,1,128,1,fp8,fp8,0,2.9247312545776367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,2,128,1,float16,float16,0,3.796712112426758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,2,128,1,float16,fp8,0,2.9385263442993166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,2,128,1,fp8,fp8,0,3.12476806640625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,4,128,1,float16,float16,0,3.806097412109375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,4,128,1,float16,fp8,0,2.999092864990234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,4,128,1,fp8,fp8,0,3.1127519607543945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,8,128,1,float16,float16,0,3.892171096801758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,8,128,1,float16,fp8,0,2.978767967224121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,8,128,1,fp8,fp8,0,3.2366592407226564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,32,1,128,1,float16,fp8,0,14.083660888671876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,32,1,128,1,float16,float16,0,17.322407531738282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,32,1,128,1,fp8,fp8,0,13.878050231933594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,32,2,128,1,float16,float16,0,17.42760467529297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,32,2,128,1,float16,fp8,0,14.108419799804688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,32,2,128,1,fp8,fp8,0,13.498843383789062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,32,4,128,1,float16,fp8,0,13.756019592285156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,32,4,128,1,float16,float16,0,17.935810852050782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,32,4,128,1,fp8,fp8,0,13.84691925048828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,32,8,128,1,float16,float16,0,18.235047912597658
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,32,128,1,float16,fp8,0,2.951363182067871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,32,8,128,1,float16,fp8,0,13.648915100097657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,32,32,128,1,fp8,fp8,0,2.941873550415039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,32,128,1,float16,fp8,0,6.116918563842773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,32,32,128,1,fp8,fp8,0,6.138103866577149
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,32,8,128,1,fp8,fp8,0,13.9605712890625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,32,128,1,float16,fp8,0,12.360230255126954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,32,128,1,float16,float16,0,9.044407653808594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,32,128,1,float16,fp8,0,7.0752098083496096
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,32,32,128,1,fp8,fp8,0,12.172090911865235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,32,128,1,fp8,fp8,0,7.1043548583984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,1,128,1,float16,fp8,0,6.857422637939453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,1,128,1,float16,float16,0,8.571939086914062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,1,128,1,fp8,fp8,0,6.733888244628906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,2,128,1,float16,fp8,0,6.798271942138672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,2,128,1,float16,float16,0,8.548372650146485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,2,128,1,fp8,fp8,0,6.670136260986328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,4,128,1,float16,float16,0,8.620500946044922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,4,128,1,fp8,fp8,0,6.967353820800781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,32,128,1,float16,fp8,0,3.453201675415039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,32,128,1,float16,float16,0,4.518844985961914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,8,128,1,float16,fp8,0,6.788919830322266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,8,128,1,fp8,fp8,0,6.9588157653808596
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,8,128,1,float16,float16,0,8.722856140136718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,32,4,128,1,float16,fp8,0,6.646086120605469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,32,128,1,fp8,fp8,0,3.5881919860839844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,1,128,1,float16,float16,0,4.403692626953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,1,128,1,float16,fp8,0,3.327356719970703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,1,128,1,fp8,fp8,0,3.4528350830078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,2,128,1,float16,fp8,0,3.410923385620117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,2,128,1,fp8,fp8,0,3.5318111419677733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,4,128,1,float16,float16,0,4.097504043579102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,4,128,1,fp8,fp8,0,3.64561767578125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,8,128,1,float16,float16,0,4.263630294799805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,32,128,1,float16,float16,0,2.188528060913086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,32,128,1,float16,fp8,0,1.8494047164916991
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,8,128,1,fp8,fp8,0,3.381569671630859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,2,128,1,float16,float16,0,4.165803146362305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,4,128,1,float16,fp8,0,3.392396926879883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,1,128,1,float16,float16,0,2.0705631256103514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,1,128,1,float16,fp8,0,1.852694320678711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,2,128,1,float16,fp8,0,1.6494735717773437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,2,128,1,float16,float16,0,1.9878639221191405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,2,128,1,fp8,fp8,0,1.6388751983642578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,32,8,128,1,float16,fp8,0,3.390675354003906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,4,128,1,float16,fp8,0,1.7121311187744142
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,4,128,1,float16,float16,0,2.2797519683837892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,8,128,1,float16,float16,0,2.09607048034668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,8,128,1,float16,fp8,0,1.6765567779541015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,32,128,1,fp8,fp8,0,1.7802255630493165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,8,128,1,fp8,fp8,0,1.658950424194336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,1,128,1,fp8,fp8,0,1.6800111770629882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,32,4,128,1,fp8,fp8,0,1.6495599746704102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,32,1,128,1,float16,fp8,0,9.477454376220702
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,32,1,128,1,fp8,fp8,0,9.493135833740235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,32,2,128,1,float16,fp8,0,9.595101165771485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,32,2,128,1,fp8,fp8,0,9.517070770263672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,32,1,128,1,float16,float16,0,12.422257232666016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,32,2,128,1,float16,float16,0,12.202435302734376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,32,4,128,1,float16,fp8,0,9.45379180908203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,32,4,128,1,float16,float16,0,11.987846374511719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,32,128,1,float16,fp8,0,5.127481460571289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,32,128,1,fp8,fp8,0,4.955028915405274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,32,4,128,1,fp8,fp8,0,9.410809326171876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,32,8,128,1,float16,fp8,0,9.741582489013672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,32,8,128,1,fp8,fp8,0,9.590993499755859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,32,128,1,float16,float16,0,6.440364837646484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,32,8,128,1,float16,float16,0,12.578318023681641
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,1,128,1,float16,float16,0,5.720388793945313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,1,128,1,float16,fp8,0,4.732902526855469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,1,128,1,fp8,fp8,0,4.712310409545898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,2,128,1,float16,fp8,0,4.8006847381591795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,2,128,1,fp8,fp8,0,4.738600158691407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,2,128,1,float16,float16,0,6.118588638305664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,4,128,1,fp8,fp8,0,4.790769577026367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,4,128,1,float16,fp8,0,4.892884826660156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,8,128,1,float16,fp8,0,4.778756713867187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,4,128,1,float16,float16,0,5.953263854980468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,32,128,1,float16,float16,0,3.104841613769531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,8,128,1,float16,float16,0,6.212332916259766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,32,8,128,1,fp8,fp8,0,4.85022087097168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,32,128,1,fp8,fp8,0,2.465598487854004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,1,128,1,float16,fp8,0,2.3339439392089845
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,1,128,1,float16,float16,0,2.893217658996582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,1,128,1,fp8,fp8,0,2.480979156494141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,2,128,1,float16,float16,0,2.9884544372558595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,2,128,1,float16,fp8,0,2.7418880462646484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,32,128,1,float16,fp8,0,2.573284721374512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,2,128,1,fp8,fp8,0,2.407164764404297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,4,128,1,float16,fp8,0,2.3480159759521486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,4,128,1,float16,float16,0,2.716452789306641
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,4,128,1,fp8,fp8,0,2.507694435119629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,32,128,1,float16,float16,0,1.4778287887573243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,8,128,1,float16,float16,0,2.9693599700927735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,32,128,1,float16,fp8,0,1.4009200096130372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,8,128,1,float16,fp8,0,2.678280067443848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,32,128,1,fp8,fp8,0,1.2527199745178224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,32,8,128,1,fp8,fp8,0,2.4473503112792967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,1,128,1,float16,fp8,0,1.1768239974975585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,1,128,1,fp8,fp8,0,1.2236720085144044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,2,128,1,float16,float16,0,1.4815823554992675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,2,128,1,float16,fp8,0,1.3275471687316895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,2,128,1,fp8,fp8,0,1.2475664138793945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,4,128,1,float16,float16,0,1.3989935874938966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,4,128,1,fp8,fp8,0,1.172009563446045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,8,128,1,float16,float16,0,1.3745887756347657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,8,128,1,float16,fp8,0,1.3263104438781739
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,1,128,1,float16,float16,0,1.4869808197021483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,4,128,1,float16,fp8,0,1.1839407920837401
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,32,8,128,1,fp8,fp8,0,1.272275161743164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,32,1,128,1,float16,fp8,0,12.305423736572266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,32,1,128,1,fp8,fp8,0,12.243270111083984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,32,2,128,1,float16,fp8,0,12.124827575683593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,32,1,128,1,float16,float16,0,16.06604461669922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,32,2,128,1,float16,float16,0,15.838861083984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,32,2,128,1,fp8,fp8,0,12.535769653320312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,32,4,128,1,float16,float16,0,16.069149780273438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,32,128,1,float16,fp8,0,6.529080200195312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,32,4,128,1,fp8,fp8,0,12.4960205078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,32,4,128,1,float16,fp8,0,12.406390380859374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,32,128,1,float16,float16,0,8.8408447265625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,32,8,128,1,float16,fp8,0,12.302098846435547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,32,128,1,fp8,fp8,0,6.598294067382812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,32,8,128,1,fp8,fp8,0,12.540953826904296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,32,8,128,1,float16,float16,0,15.830970764160156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,1,128,1,float16,float16,0,7.818267059326172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,1,128,1,fp8,fp8,0,6.118719863891601
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,1,128,1,float16,fp8,0,6.089795303344727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,2,128,1,float16,float16,0,7.932548522949219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,2,128,1,float16,fp8,0,6.052788925170899
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,2,128,1,fp8,fp8,0,6.2105152130126955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,4,128,1,float16,fp8,0,6.156516647338867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,4,128,1,float16,float16,0,7.954844665527344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,4,128,1,fp8,fp8,0,6.2554176330566404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,8,128,1,float16,fp8,0,6.183932876586914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,32,128,1,float16,fp8,0,3.2187232971191406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,32,128,1,float16,float16,0,3.9650062561035155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,8,128,1,float16,float16,0,7.937430572509766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,32,128,1,fp8,fp8,0,3.184017562866211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,1,128,1,float16,float16,0,3.90032958984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,1,128,1,float16,fp8,0,3.2026256561279296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,1,128,1,fp8,fp8,0,3.038256072998047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,32,8,128,1,fp8,fp8,0,6.223270416259766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,2,128,1,float16,fp8,0,3.077409553527832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,2,128,1,fp8,fp8,0,3.205936050415039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,2,128,1,float16,float16,0,3.8732494354248046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,4,128,1,float16,float16,0,3.7937889099121094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,32,128,1,float16,float16,0,1.9285951614379884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,4,128,1,float16,fp8,0,3.149465560913086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,8,128,1,float16,fp8,0,3.0993200302124024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,32,128,1,fp8,fp8,0,1.7534048080444335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,8,128,1,float16,float16,0,3.832388687133789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,8,128,1,fp8,fp8,0,3.0336559295654295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,1,128,1,float16,fp8,0,1.5652496337890625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,1,128,1,float16,float16,0,2.0639312744140623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,1,128,1,fp8,fp8,0,1.4984288215637207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,2,128,1,fp8,fp8,0,1.5137439727783204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,2,128,1,float16,fp8,0,1.5744159698486329
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,2,128,1,float16,float16,0,1.7663856506347657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,4,128,1,float16,float16,0,1.8336063385009767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,32,4,128,1,fp8,fp8,0,3.043408012390137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,32,128,1,float16,fp8,0,1.7271648406982423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,4,128,1,float16,fp8,0,1.72283992767334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,4,128,1,fp8,fp8,0,1.506492805480957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,8,128,1,float16,fp8,0,1.5287535667419434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,8,128,1,float16,float16,0,1.7664880752563477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,32,128,1,fp8,fp8,0,0.7890511989593506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,32,128,1,float16,float16,0,0.9817024230957031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,1,128,1,float16,float16,0,0.89028959274292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,1,128,1,float16,fp8,0,0.775435209274292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,2,128,1,fp8,fp8,0,0.7879183769226075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,2,128,1,float16,fp8,0,0.7509712219238281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,4,128,1,float16,float16,0,0.8805328369140625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,4,128,1,float16,fp8,0,0.7540080070495605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,2,128,1,float16,float16,0,0.9099856376647949
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,1,128,1,fp8,fp8,0,0.7617919921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,4,128,1,fp8,fp8,0,0.768990421295166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,8,128,1,float16,float16,0,0.8978896141052246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,8,128,1,float16,fp8,0,0.7854000091552734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,8,128,1,fp8,fp8,0,0.75797438621521
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,32,32,128,1,float16,fp8,0,0.7938096046447753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,32,8,128,1,fp8,fp8,0,1.6766815185546875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,32,1,128,1,float16,fp8,0,7.060710144042969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,32,1,128,1,fp8,fp8,0,7.070302581787109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,32,2,128,1,float16,fp8,0,7.100179290771484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,32,2,128,1,fp8,fp8,0,7.158731079101562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,32,1,128,1,float16,float16,0,8.76043701171875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,32,2,128,1,float16,float16,0,8.962078094482422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,32,4,128,1,float16,float16,0,8.960736083984376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,32,4,128,1,float16,fp8,0,7.040452575683593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,32,128,1,float16,float16,0,4.786735916137696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,32,4,128,1,fp8,fp8,0,7.084381103515625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,32,128,1,float16,fp8,0,3.8362369537353516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,32,8,128,1,float16,fp8,0,7.179961395263672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,32,128,1,fp8,fp8,0,3.8571327209472654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,32,8,128,1,fp8,fp8,0,7.31759033203125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,32,8,128,1,float16,float16,0,9.235865783691406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,1,128,1,float16,float16,0,4.3109489440917965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,1,128,1,fp8,fp8,0,3.486307144165039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,1,128,1,float16,fp8,0,3.5678127288818358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,2,128,1,float16,fp8,0,3.5232608795166014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,2,128,1,fp8,fp8,0,3.5408206939697267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,2,128,1,float16,float16,0,4.467723083496094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,4,128,1,float16,fp8,0,3.6234977722167967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,4,128,1,fp8,fp8,0,3.5670703887939452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,32,128,1,float16,float16,0,2.359668731689453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,32,128,1,float16,fp8,0,1.875422477722168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,8,128,1,float16,fp8,0,3.5209808349609375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,8,128,1,fp8,fp8,0,3.553732681274414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,32,128,1,fp8,fp8,0,1.8554880142211914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,8,128,1,float16,float16,0,4.472894287109375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,32,4,128,1,float16,float16,0,4.507223892211914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,1,128,1,float16,fp8,0,1.744900894165039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,1,128,1,fp8,fp8,0,1.7452335357666016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,2,128,1,float16,float16,0,2.029318428039551
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,1,128,1,float16,float16,0,2.0764272689819334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,2,128,1,fp8,fp8,0,1.889967918395996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,4,128,1,float16,float16,0,2.099567985534668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,4,128,1,float16,fp8,0,1.7311920166015624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,4,128,1,fp8,fp8,0,1.7427343368530273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,8,128,1,float16,fp8,0,2.022632026672363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,8,128,1,fp8,fp8,0,1.7828880310058595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,8,128,1,float16,float16,0,2.2441312789916994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,32,128,1,float16,float16,0,1.2352928161621093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,32,128,1,fp8,fp8,0,0.9326128005981446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,1,128,1,float16,fp8,0,0.9092576026916503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,1,128,1,fp8,fp8,0,0.8906064033508301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,32,2,128,1,float16,fp8,0,1.7459903717041017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,2,128,1,float16,fp8,0,0.9659872055053711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,2,128,1,float16,float16,0,1.1307920455932616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,2,128,1,fp8,fp8,0,0.8742192268371582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,4,128,1,float16,fp8,0,0.8823712348937989
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,4,128,1,float16,float16,0,1.028940773010254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,4,128,1,fp8,fp8,0,0.8800064086914062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,8,128,1,float16,float16,0,1.0706543922424316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,32,128,1,float16,fp8,0,0.4931183815002441
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,32,128,1,float16,float16,0,0.5759407997131347
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,32,128,1,fp8,fp8,0,0.4751391887664795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,8,128,1,fp8,fp8,0,0.8811087608337402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,1,128,1,float16,float16,0,0.5456816196441651
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,1,128,1,float16,fp8,0,0.48812479972839357
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,1,128,1,fp8,fp8,0,0.44193120002746583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,2,128,1,float16,fp8,0,0.44298558235168456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,2,128,1,fp8,fp8,0,0.49453282356262207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,4,128,1,float16,fp8,0,0.4445519924163818
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,4,128,1,float16,float16,0,0.5248784065246582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,4,128,1,fp8,fp8,0,0.44486079216003416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,8,128,1,float16,float16,0,0.5537295818328858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,8,128,1,float16,fp8,0,0.4461775779724121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,8,128,1,fp8,fp8,0,0.4614687919616699
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,1,128,1,float16,float16,0,1.009727954864502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,8,128,1,float16,fp8,0,0.9939632415771484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,32,2,128,1,float16,float16,0,0.5080463886260986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,32,32,128,1,float16,fp8,0,0.9341327667236328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,32,1,128,1,float16,fp8,0,6.656603240966797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,32,1,128,1,fp8,fp8,0,6.516880035400391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,32,2,128,1,float16,fp8,0,6.64176025390625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,32,1,128,1,float16,float16,0,8.189220428466797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,32,2,128,1,fp8,fp8,0,6.572227478027344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,32,2,128,1,float16,float16,0,8.500176239013673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,32,4,128,1,float16,fp8,0,6.605499267578125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,32,4,128,1,float16,float16,0,8.262563323974609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,32,128,1,float16,fp8,0,3.6910030364990236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,32,128,1,float16,float16,0,4.581612777709961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,32,4,128,1,fp8,fp8,0,6.770777893066406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,32,8,128,1,float16,fp8,0,6.772695922851563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,32,128,1,fp8,fp8,0,3.5955631256103517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,32,8,128,1,fp8,fp8,0,6.6617073059082035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,1,128,1,float16,float16,0,4.0422721862792965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,32,8,128,1,float16,float16,0,8.668479919433594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,1,128,1,float16,fp8,0,3.2763439178466798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,1,128,1,fp8,fp8,0,3.338336181640625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,2,128,1,float16,float16,0,4.142697525024414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,2,128,1,float16,fp8,0,3.333588790893555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,4,128,1,float16,float16,0,4.014198303222656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,4,128,1,float16,fp8,0,3.335224151611328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,4,128,1,fp8,fp8,0,3.349435043334961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,8,128,1,float16,fp8,0,3.3278526306152343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,32,128,1,float16,float16,0,2.226059150695801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,32,128,1,float16,fp8,0,1.9879600524902343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,32,128,1,fp8,fp8,0,1.7777231216430665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,1,128,1,float16,float16,0,1.9184608459472656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,8,128,1,fp8,fp8,0,3.393393707275391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,2,128,1,fp8,fp8,0,3.2706558227539064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,1,128,1,float16,fp8,0,1.660152053833008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,1,128,1,fp8,fp8,0,1.7330976486206056
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,2,128,1,float16,fp8,0,1.653156852722168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,2,128,1,float16,float16,0,1.9094367980957032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,2,128,1,fp8,fp8,0,1.7442272186279297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,4,128,1,float16,fp8,0,1.625699234008789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,4,128,1,float16,float16,0,2.022684860229492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,4,128,1,fp8,fp8,0,1.629420852661133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,32,128,1,float16,float16,0,1.0674448013305664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,32,8,128,1,float16,float16,0,4.250780868530273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,8,128,1,float16,fp8,0,1.6395023345947266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,8,128,1,float16,float16,0,2.0144607543945314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,32,128,1,fp8,fp8,0,1.0105567932128907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,1,128,1,float16,float16,0,0.9443679809570312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,1,128,1,float16,fp8,0,0.8879839897155761
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,1,128,1,fp8,fp8,0,0.8199647903442383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,2,128,1,float16,float16,0,0.933011245727539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,2,128,1,float16,fp8,0,0.9322015762329101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,4,128,1,float16,float16,0,0.9636672019958497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,8,128,1,float16,fp8,0,0.8434783935546875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,8,128,1,float16,float16,0,0.9561871528625489
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,32,128,1,float16,fp8,0,0.8901632308959961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,4,128,1,float16,fp8,0,0.8251919746398926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,32,8,128,1,fp8,fp8,0,1.7451360702514649
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,32,128,1,float16,float16,0,0.5532368183135986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,4,128,1,fp8,fp8,0,0.8220512390136718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,8,128,1,fp8,fp8,0,0.82467041015625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,32,128,1,float16,fp8,0,0.454966402053833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,1,128,1,float16,fp8,0,0.4243216037750244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,1,128,1,float16,float16,0,0.4744080066680908
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,1,128,1,fp8,fp8,0,0.4139279842376709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,32,2,128,1,fp8,fp8,0,0.8843071937561036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,2,128,1,float16,float16,0,0.47771358489990234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,2,128,1,float16,fp8,0,0.4141823768615723
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,2,128,1,fp8,fp8,0,0.41897759437561033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,4,128,1,float16,fp8,0,0.41579360961914064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,4,128,1,float16,float16,0,0.48190717697143554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,4,128,1,fp8,fp8,0,0.41626877784729005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,8,128,1,float16,float16,0,0.49295997619628906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,8,128,1,float16,fp8,0,0.417844820022583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,8,128,1,fp8,fp8,0,0.41892638206481936
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,32,128,1,float16,float16,0,0.2769936084747314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,32,128,1,float16,fp8,0,0.232590389251709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,32,128,1,fp8,fp8,0,0.2328399896621704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,1,128,1,float16,float16,0,0.24415841102600097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,1,128,1,float16,fp8,0,0.21075038909912108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,1,128,1,fp8,fp8,0,0.21197121143341063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,2,128,1,fp8,fp8,0,0.21322879791259766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,4,128,1,float16,fp8,0,0.2130064010620117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,4,128,1,float16,float16,0,0.24651520252227782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,2,128,1,float16,fp8,0,0.2134783983230591
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,4,128,1,fp8,fp8,0,0.2137808084487915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,8,128,1,float16,fp8,0,0.21386559009552003
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,8,128,1,fp8,fp8,0,0.21233599185943602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,8,128,1,float16,float16,0,0.25099198818206786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,32,2,128,1,float16,float16,0,0.24501919746398926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,32,32,128,1,fp8,fp8,0,0.4528160095214844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,32,1,128,1,float16,fp8,0,3.8347503662109377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,32,1,128,1,fp8,fp8,0,3.7799728393554686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,32,1,128,1,float16,float16,0,4.532611083984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,32,2,128,1,fp8,fp8,0,3.868891143798828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,32,2,128,1,float16,fp8,0,3.8540576934814452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,32,2,128,1,float16,float16,0,4.575395202636718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,32,4,128,1,float16,fp8,0,3.8386959075927733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,32,4,128,1,float16,float16,0,4.719049453735352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,32,4,128,1,fp8,fp8,0,3.8688480377197267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,32,128,1,float16,fp8,0,2.1886959075927734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,32,8,128,1,float16,float16,0,4.883660888671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,1,128,1,float16,float16,0,2.281888008117676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,32,128,1,fp8,fp8,0,2.132659149169922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,1,128,1,float16,fp8,0,1.9230432510375977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,32,8,128,1,fp8,fp8,0,3.932324981689453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,1,128,1,fp8,fp8,0,1.911729621887207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,2,128,1,float16,float16,0,2.3466447830200194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,32,8,128,1,float16,fp8,0,3.8951473236083984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,2,128,1,fp8,fp8,0,1.9162368774414062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,2,128,1,float16,fp8,0,2.025275230407715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,4,128,1,float16,fp8,0,1.9648176193237306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,4,128,1,fp8,fp8,0,1.929105567932129
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,4,128,1,float16,float16,0,2.25842399597168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,32,128,1,float16,float16,0,2.6177024841308594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,8,128,1,float16,float16,0,2.4433135986328125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,32,128,1,float16,fp8,0,1.0890815734863282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,8,128,1,float16,fp8,0,1.9451824188232423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,32,128,1,float16,float16,0,1.3197296142578125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,32,128,1,fp8,fp8,0,1.0722991943359375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,32,8,128,1,fp8,fp8,0,1.946183967590332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,1,128,1,float16,float16,0,1.1285247802734375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,1,128,1,fp8,fp8,0,1.053985595703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,1,128,1,float16,fp8,0,0.9622464179992676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,2,128,1,float16,float16,0,1.109499168395996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,2,128,1,fp8,fp8,0,1.0177184104919434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,4,128,1,float16,fp8,0,0.9880463600158691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,4,128,1,float16,float16,0,1.1246383666992188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,4,128,1,fp8,fp8,0,1.040559959411621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,32,128,1,float16,float16,0,0.6696735858917237
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,32,128,1,fp8,fp8,0,0.578656005859375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,8,128,1,float16,fp8,0,0.97598876953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,1,128,1,float16,float16,0,0.5832079887390137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,8,128,1,float16,float16,0,1.1425359725952149
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,8,128,1,fp8,fp8,0,1.02009916305542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,1,128,1,float16,fp8,0,0.49094882011413576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,1,128,1,fp8,fp8,0,0.5177919864654541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,2,128,1,float16,fp8,0,0.4970880031585693
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,2,128,1,float16,float16,0,0.5566927909851074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,2,128,1,fp8,fp8,0,0.49002561569213865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,4,128,1,float16,float16,0,0.5714128017425537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,4,128,1,float16,fp8,0,0.49058079719543457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,4,128,1,fp8,fp8,0,0.5008880138397217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,8,128,1,float16,fp8,0,0.5155632019042968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,8,128,1,fp8,fp8,0,0.49457921981811526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,32,128,1,float16,float16,0,0.34343039989471436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,32,128,1,float16,fp8,0,0.2762975931167603
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,32,128,1,fp8,fp8,0,0.2786144018173218
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,1,128,1,float16,float16,0,0.2902368068695068
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,1,128,1,float16,fp8,0,0.24863040447235107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,1,128,1,fp8,fp8,0,0.25988800525665284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,2,128,1,float16,float16,0,0.28569281101226807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,2,128,1,float16,fp8,0,0.24859681129455566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,2,128,1,fp8,fp8,0,0.2595904111862183
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,4,128,1,float16,float16,0,0.28780479431152345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,4,128,1,fp8,fp8,0,0.25055999755859376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,32,2,128,1,float16,fp8,0,0.9642448425292969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,8,128,1,float16,float16,0,0.3008928060531616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,8,128,1,float16,fp8,0,0.2533440113067627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,8,128,1,fp8,fp8,0,0.2529727935791016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,32,128,1,float16,fp8,0,0.14958239793777467
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,32,128,1,fp8,fp8,0,0.14420319795608522
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,1,128,1,float16,float16,0,0.14738080501556397
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,1,128,1,float16,fp8,0,0.13332959413528442
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,1,128,1,fp8,fp8,0,0.1280527949333191
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,2,128,1,float16,float16,0,0.1487023949623108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,2,128,1,float16,fp8,0,0.13186399936676024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,4,128,1,float16,float16,0,0.15032320022583007
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,32,128,1,float16,fp8,0,0.5432991981506348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,4,128,1,fp8,fp8,0,0.12947200536727904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,8,128,1,float16,float16,0,0.15308640003204346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,8,128,1,float16,fp8,0,0.13156319856643678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,8,128,1,fp8,fp8,0,0.13244800567626952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,32,8,128,1,float16,float16,0,0.5737711906433105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,32,4,128,1,float16,fp8,0,0.2560703992843628
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,32,128,1,float16,float16,0,0.1774943947792053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,2,128,1,fp8,fp8,0,0.1284719944000244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,32,4,128,1,float16,fp8,0,0.12971359491348267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,32,1,128,1,float16,fp8,0,3.767776107788086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,32,1,128,1,float16,float16,0,4.5817710876464846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,32,1,128,1,fp8,fp8,0,3.769196701049805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,32,2,128,1,float16,fp8,0,3.734449768066406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,32,2,128,1,float16,float16,0,4.572623825073242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,32,2,128,1,fp8,fp8,0,3.7458961486816404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,32,4,128,1,float16,float16,0,4.590974426269531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,32,4,128,1,float16,fp8,0,3.78524169921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,32,4,128,1,fp8,fp8,0,3.9017391204833984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,32,8,128,1,float16,fp8,0,3.827449417114258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,32,8,128,1,float16,float16,0,4.809928131103516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,32,128,1,float16,fp8,0,2.160441589355469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,32,128,1,float16,float16,0,2.826532745361328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,1,128,1,float16,float16,0,2.1795440673828126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,32,128,1,fp8,fp8,0,2.217420768737793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,32,8,128,1,fp8,fp8,0,3.8505023956298827
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,1,128,1,float16,fp8,0,1.881368064880371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,1,128,1,fp8,fp8,0,1.8819087982177733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,2,128,1,float16,fp8,0,1.8832431793212892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,2,128,1,fp8,fp8,0,1.9854127883911132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,2,128,1,float16,float16,0,2.3203311920166017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,4,128,1,float16,fp8,0,1.897208023071289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,4,128,1,fp8,fp8,0,1.9070608139038085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,4,128,1,float16,float16,0,2.3268287658691404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,8,128,1,float16,float16,0,2.3575807571411134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,32,128,1,float16,fp8,0,1.156049633026123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,32,128,1,float16,float16,0,1.335904026031494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,8,128,1,fp8,fp8,0,1.9157472610473634
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,32,128,1,fp8,fp8,0,1.084113597869873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,1,128,1,float16,float16,0,1.0796624183654786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,1,128,1,float16,fp8,0,1.0319503784179687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,1,128,1,fp8,fp8,0,1.0480336189270019
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,2,128,1,float16,float16,0,1.0914928436279296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,2,128,1,fp8,fp8,0,0.9448368072509765
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,2,128,1,float16,fp8,0,1.0330639839172364
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,4,128,1,float16,fp8,0,0.9700736045837403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,4,128,1,float16,float16,0,1.0911824226379394
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,32,128,1,float16,float16,0,0.6837103843688965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,32,128,1,float16,fp8,0,0.5531392097473145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,8,128,1,float16,float16,0,1.1487728118896485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,32,128,1,fp8,fp8,0,0.5989391803741455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,8,128,1,float16,fp8,0,0.9883407592773438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,8,128,1,fp8,fp8,0,0.9629343986511231
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,1,128,1,float16,fp8,0,0.4801680088043213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,1,128,1,fp8,fp8,0,0.49361600875854494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,1,128,1,float16,float16,0,0.5486303806304932
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,2,128,1,float16,float16,0,0.5637648105621338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,2,128,1,fp8,fp8,0,0.4777167797088623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,32,8,128,1,float16,fp8,0,1.9159263610839843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,4,128,1,float16,float16,0,0.5520304203033447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,4,128,1,fp8,fp8,0,0.4910240173339844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,4,128,1,float16,fp8,0,0.48867359161376955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,8,128,1,float16,fp8,0,0.4876688003540039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,8,128,1,float16,float16,0,0.5730800151824951
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,8,128,1,fp8,fp8,0,0.49137282371520996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,32,128,1,float16,float16,0,0.34024639129638673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,32,128,1,float16,fp8,0,0.28273279666900636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,1,128,1,fp8,fp8,0,0.24626400470733642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,2,128,1,float16,float16,0,0.27857921123504636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,2,128,1,float16,fp8,0,0.24524800777435302
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,2,128,1,fp8,fp8,0,0.24655520915985107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,4,128,1,float16,float16,0,0.28188159465789797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,4,128,1,float16,fp8,0,0.24539039134979249
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,32,4,128,1,fp8,fp8,0,0.974505615234375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,4,128,1,fp8,fp8,0,0.24749279022216797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,8,128,1,float16,float16,0,0.289247989654541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,8,128,1,float16,fp8,0,0.24877440929412842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,32,128,1,float16,float16,0,0.17766720056533813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,8,128,1,fp8,fp8,0,0.2514944076538086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,32,128,1,float16,fp8,0,0.14905439615249633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,32,128,1,fp8,fp8,0,0.14789919853210448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,1,128,1,float16,float16,0,0.14313119649887085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,1,128,1,float16,fp8,0,0.12749439477920532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,1,128,1,fp8,fp8,0,0.12770559787750244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,2,128,1,float16,float16,0,0.14410400390625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,2,128,1,fp8,fp8,0,0.12813440561294556
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,4,128,1,float16,float16,0,0.14678239822387695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,4,128,1,float16,fp8,0,0.12929439544677734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,4,128,1,fp8,fp8,0,0.1294543981552124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,8,128,1,float16,float16,0,0.15113279819488526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,8,128,1,float16,fp8,0,0.13137600421905518
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,8,128,1,fp8,fp8,0,0.13126399517059326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,32,128,1,float16,float16,0,0.09387360215187072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,32,128,1,float16,fp8,0,0.08031039834022521
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,32,128,1,fp8,fp8,0,0.08002399802207946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,1,128,1,float16,float16,0,0.07397440075874329
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,1,128,1,float16,fp8,0,0.06691840291023254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,1,128,1,fp8,fp8,0,0.06684640049934387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,2,128,1,float16,float16,0,0.07474079728126526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,2,128,1,float16,fp8,0,0.06700320243835449
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,1,128,1,float16,fp8,0,0.2538928031921387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,2,128,1,fp8,fp8,0,0.06690080165863037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,4,128,1,float16,float16,0,0.07659519910812378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,4,128,1,float16,fp8,0,0.06765120029449463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,8,128,1,float16,float16,0,0.08154240250587463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,8,128,1,float16,fp8,0,0.068367999792099
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,8,128,1,fp8,fp8,0,0.06747199892997742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,32,2,128,1,float16,fp8,0,0.1286352038383484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,32,2,128,1,float16,fp8,0,0.4868288040161133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,32,1,128,1,float16,fp8,0,2.332276725769043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,32,1,128,1,float16,float16,0,2.5729808807373047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,1,128,1,float16,float16,0,0.27531359195709226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,32,4,128,1,fp8,fp8,0,0.06741920113563538
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,32,1,128,1,fp8,fp8,0,2.303759956359863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,32,2,128,1,float16,float16,0,2.593547248840332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,32,2,128,1,float16,fp8,0,2.3362400054931642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,32,2,128,1,fp8,fp8,0,2.3836816787719726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,32,4,128,1,float16,fp8,0,2.3445600509643554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,32,4,128,1,float16,float16,0,2.746148872375488
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,32,4,128,1,fp8,fp8,0,2.323796844482422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,32,32,128,1,fp8,fp8,0,0.2847791910171509
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,32,8,128,1,float16,fp8,0,2.3717424392700197
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,32,8,128,1,fp8,fp8,0,2.381292724609375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,32,128,1,float16,float16,0,1.6651071548461913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,32,8,128,1,float16,float16,0,2.925815963745117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,1,128,1,float16,fp8,0,1.1589776039123536
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,32,128,1,fp8,fp8,0,1.376352024078369
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,1,128,1,float16,float16,0,1.2903823852539062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,1,128,1,fp8,fp8,0,1.1589712142944335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,2,128,1,float16,float16,0,1.3044560432434082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,2,128,1,float16,fp8,0,1.2399120330810547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,4,128,1,float16,fp8,0,1.1704863548278808
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,4,128,1,fp8,fp8,0,1.1756896018981933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,4,128,1,float16,float16,0,1.3307120323181152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,8,128,1,float16,float16,0,1.385955238342285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,32,128,1,float16,fp8,0,0.6977071762084961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,32,128,1,fp8,fp8,0,0.7227647781372071
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,32,128,1,float16,float16,0,0.8409695625305176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,8,128,1,fp8,fp8,0,1.207953643798828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,1,128,1,float16,fp8,0,0.5863327980041504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,1,128,1,float16,float16,0,0.6778336048126221
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,32,128,1,float16,fp8,0,1.4939087867736816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,1,128,1,fp8,fp8,0,0.6067071914672851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,2,128,1,float16,float16,0,0.6507679939270019
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,2,128,1,fp8,fp8,0,0.5864463806152344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,4,128,1,float16,float16,0,0.6938208103179931
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,4,128,1,float16,fp8,0,0.5927472114562988
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,4,128,1,fp8,fp8,0,0.6119120121002197
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,8,128,1,float16,float16,0,0.6856336116790771
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,8,128,1,float16,fp8,0,0.6013008117675781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,32,128,1,float16,fp8,0,0.3581455945968628
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,32,128,1,float16,float16,0,0.4283760070800781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,8,128,1,fp8,fp8,0,0.6292031764984131
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,2,128,1,fp8,fp8,0,1.2579615592956543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,1,128,1,float16,float16,0,0.3324608087539673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,1,128,1,float16,fp8,0,0.2990128040313721
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,1,128,1,fp8,fp8,0,0.2983743906021118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,2,128,1,float16,float16,0,0.34190878868103025
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,2,128,1,float16,fp8,0,0.29754080772399905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,2,128,1,fp8,fp8,0,0.2987152099609375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,4,128,1,float16,float16,0,0.34122400283813475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,4,128,1,float16,fp8,0,0.30209760665893554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,4,128,1,fp8,fp8,0,0.3047760009765625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,8,128,1,float16,fp8,0,0.3065984010696411
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,32,8,128,1,float16,fp8,0,1.2652496337890624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,32,128,1,float16,fp8,0,0.18708800077438353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,32,128,1,fp8,fp8,0,0.18534719944000244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,1,128,1,float16,float16,0,0.17244160175323486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,1,128,1,float16,fp8,0,0.15534720420837403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,1,128,1,fp8,fp8,0,0.1549343943595886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,2,128,1,float16,float16,0,0.17371679544448854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,2,128,1,fp8,fp8,0,0.15501279830932618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,4,128,1,float16,float16,0,0.17717280387878417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,32,2,128,1,float16,fp8,0,0.5918335914611816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,4,128,1,float16,fp8,0,0.1564352035522461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,4,128,1,fp8,fp8,0,0.15645760297775269
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,8,128,1,float16,float16,0,0.18352160453796387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,8,128,1,fp8,fp8,0,0.15942879915237426
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,32,128,1,float16,float16,0,0.11756160259246826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,32,128,1,float16,fp8,0,0.09997280240058899
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,32,128,1,fp8,fp8,0,0.0997983992099762
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,1,128,1,float16,float16,0,0.09156479835510253
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,1,128,1,float16,fp8,0,0.08101119995117187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,1,128,1,fp8,fp8,0,0.08042240142822266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,2,128,1,float16,float16,0,0.09278720021247863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,2,128,1,float16,fp8,0,0.08125439882278443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,32,128,1,fp8,fp8,0,0.3581552028656006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,2,128,1,fp8,fp8,0,0.08084800243377685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,4,128,1,float16,float16,0,0.09412159919738769
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,4,128,1,float16,fp8,0,0.08171520233154297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,4,128,1,fp8,fp8,0,0.08235999941825867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,8,128,1,float16,float16,0,0.09724640250205993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,8,128,1,float16,fp8,0,0.0846127986907959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,32,8,128,1,fp8,fp8,0,0.08447359800338745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,32,128,1,float16,float16,0,0.06473119854927063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,8,128,1,float16,float16,0,0.35067360401153563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,32,128,1,fp8,fp8,0,0.054262399673461914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,32,8,128,1,fp8,fp8,0,0.3064160108566284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,32,128,1,float16,float16,0,0.2217871904373169
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,1,128,1,float16,fp8,0,0.044572800397872925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,1,128,1,fp8,fp8,0,0.04465599954128265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,2,128,1,float16,fp8,0,0.04456160068511963
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,2,128,1,float16,float16,0,0.048742398619651794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,2,128,1,fp8,fp8,0,0.04492799937725067
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,4,128,1,float16,float16,0,0.049395200610160825
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,4,128,1,fp8,fp8,0,0.04500479996204376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,8,128,1,float16,float16,0,0.0517520010471344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,2,128,1,float16,fp8,0,0.15665760040283203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,8,128,1,float16,fp8,0,0.045495998859405515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,32,8,128,1,float16,fp8,0,0.15929440259933472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,32,128,1,float16,fp8,0,0.053667199611663816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,32,1,128,1,float16,float16,0,2.721507263183594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,32,2,128,1,float16,float16,0,2.7295488357543944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,32,1,128,1,float16,fp8,0,2.4503440856933594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,1,128,1,float16,float16,0,0.04853920042514801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,32,1,128,1,fp8,fp8,0,2.4188432693481445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,4,128,1,float16,fp8,0,0.04488799870014191
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,32,8,128,1,fp8,fp8,0,0.04563679993152618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,32,2,128,1,fp8,fp8,0,2.530232048034668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,32,4,128,1,float16,fp8,0,2.437321662902832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,32,4,128,1,float16,float16,0,2.8399824142456054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,32,4,128,1,fp8,fp8,0,2.451945686340332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,32,8,128,1,fp8,fp8,0,2.5078655242919923
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,32,8,128,1,float16,fp8,0,2.5088432312011717
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,32,2,128,1,float16,fp8,0,2.4626079559326173
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,32,128,1,float16,float16,0,1.8303199768066407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,1,128,1,float16,float16,0,1.316923236846924
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,32,128,1,float16,fp8,0,1.5018943786621093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,1,128,1,float16,fp8,0,1.2175647735595703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,1,128,1,fp8,fp8,0,1.2220447540283204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,2,128,1,float16,float16,0,1.3448575973510741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,2,128,1,float16,fp8,0,1.2661744117736817
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,2,128,1,fp8,fp8,0,1.2705792427062987
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,4,128,1,float16,float16,0,1.3785840034484864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,4,128,1,float16,fp8,0,1.2315936088562012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,4,128,1,fp8,fp8,0,1.2383359909057616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,32,8,128,1,float16,float16,0,2.9369903564453126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,32,128,1,float16,float16,0,0.9344736099243164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,8,128,1,float16,float16,0,1.4667856216430664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,32,128,1,float16,fp8,0,0.7614863872528076
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,32,128,1,fp8,fp8,0,0.7629183769226074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,8,128,1,fp8,fp8,0,1.2762479782104492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,1,128,1,float16,float16,0,0.6755983829498291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,1,128,1,fp8,fp8,0,0.6386640071868896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,2,128,1,float16,fp8,0,0.6164031982421875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,2,128,1,fp8,fp8,0,0.6196608066558837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,2,128,1,float16,float16,0,0.673803186416626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,32,128,1,fp8,fp8,0,1.6303375244140625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,4,128,1,float16,float16,0,0.6968480110168457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,4,128,1,float16,fp8,0,0.6403920173645019
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,4,128,1,fp8,fp8,0,0.6235792160034179
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,8,128,1,float16,fp8,0,0.6364783763885498
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,8,128,1,fp8,fp8,0,0.6412720203399658
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,32,128,1,float16,float16,0,0.47008318901062013
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,32,128,1,fp8,fp8,0,0.3883359909057617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,1,128,1,float16,float16,0,0.3397664070129395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,1,128,1,fp8,fp8,0,0.3136224031448364
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,2,128,1,float16,float16,0,0.3438271999359131
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,2,128,1,float16,fp8,0,0.31295199394226075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,2,128,1,fp8,fp8,0,0.3137471914291382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,4,128,1,float16,float16,0,0.3501343965530396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,4,128,1,float16,fp8,0,0.3169680118560791
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,32,8,128,1,float16,fp8,0,1.2797375679016114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,4,128,1,fp8,fp8,0,0.3178159952163696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,8,128,1,float16,float16,0,0.3651024103164673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,8,128,1,float16,fp8,0,0.32362239360809325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,32,128,1,float16,float16,0,0.24171679019927977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,32,128,1,float16,fp8,0,0.20016000270843506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,32,128,1,fp8,fp8,0,0.2006335973739624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,1,128,1,float16,float16,0,0.17619999647140502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,1,128,1,float16,fp8,0,0.16118719577789306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,1,128,1,fp8,fp8,0,0.16103359460830688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,2,128,1,float16,fp8,0,0.16162559986114503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,4,128,1,float16,float16,0,0.181768000125885
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,4,128,1,float16,fp8,0,0.16318080425262452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,8,128,1,float16,float16,0,0.7246416091918946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,4,128,1,fp8,fp8,0,0.16326719522476196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,32,128,1,float16,fp8,0,0.3934895992279053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,8,128,1,float16,float16,0,0.18988159894943238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,8,128,1,float16,fp8,0,0.1672335982322693
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,32,128,1,float16,float16,0,0.12776639461517333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,32,128,1,float16,fp8,0,0.1060256004333496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,32,128,1,fp8,fp8,0,0.10675359964370727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,1,128,1,float16,fp8,0,0.31442880630493164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,1,128,1,float16,fp8,0,0.08615840077400208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,1,128,1,fp8,fp8,0,0.08628479838371277
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,2,128,1,float16,float16,0,0.09540479779243469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,2,128,1,float16,fp8,0,0.08696799874305725
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,2,128,1,fp8,fp8,0,0.08685439825057983
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,4,128,1,float16,float16,0,0.09721919894218445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,4,128,1,float16,fp8,0,0.087772798538208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,4,128,1,fp8,fp8,0,0.0879151999950409
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,32,8,128,1,fp8,fp8,0,0.32764639854431155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,8,128,1,float16,float16,0,0.10090559720993042
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,8,128,1,float16,fp8,0,0.08969280123710632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,32,1,128,1,float16,fp8,0,0.63711519241333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,32,128,1,float16,float16,0,0.06950719952583313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,32,128,1,float16,fp8,0,0.05813599824905395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,2,128,1,fp8,fp8,0,0.161246395111084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,1,128,1,float16,float16,0,0.04852479994297028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,1,128,1,float16,fp8,0,0.045505601167678836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,1,128,1,fp8,fp8,0,0.04596799910068512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,2,128,1,float16,float16,0,0.048902401328086854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,2,128,1,float16,fp8,0,0.04595839977264404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,2,128,1,fp8,fp8,0,0.04566240012645721
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,4,128,1,float16,fp8,0,0.046060800552368164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,4,128,1,fp8,fp8,0,0.04593279957771301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,8,128,1,float16,float16,0,0.05488319993019104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,8,128,1,float16,fp8,0,0.04652000069618225
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,8,128,1,fp8,fp8,0,0.16823840141296387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,8,128,1,fp8,fp8,0,0.04635039865970612
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,1,128,1,float16,float16,0,0.09296640157699584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,32,128,1,fp8,fp8,0,0.030099201202392577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,1,128,1,float16,float16,0,0.028214401006698607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,1,128,1,float16,fp8,0,0.02741119861602783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,1,128,1,fp8,fp8,0,0.027382400631904603
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,2,128,1,float16,float16,0,0.02855840027332306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,2,128,1,float16,fp8,0,0.027515199780464173
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,2,128,1,fp8,fp8,0,0.027478399872779845
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,4,128,1,float16,float16,0,0.028780800104141236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,4,128,1,float16,fp8,0,0.027687999606132507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,4,128,1,fp8,fp8,0,0.027735999226570128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,8,128,1,float16,float16,0,0.02924799919128418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,8,128,1,float16,fp8,0,0.02773280143737793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,32,8,128,1,fp8,fp8,0,0.08936960101127625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,8,128,1,fp8,fp8,0,0.027876800298690795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,32,2,128,1,float16,float16,0,0.17796800136566163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,32,128,1,fp8,fp8,0,0.058641600608825686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,32,4,128,1,float16,float16,0,0.050995200872421265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,32,128,1,float16,float16,0,0.03830240070819855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,32,1,128,1,float16,fp8,0,1.8136863708496094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,32,1,128,1,float16,float16,0,1.8603744506835938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,32,1,128,1,fp8,fp8,0,1.8147375106811523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,32,2,128,1,float16,float16,0,1.8941968917846679
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,32,2,128,1,float16,fp8,0,1.7937824249267578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,32,2,128,1,fp8,fp8,0,1.8053232192993165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,32,4,128,1,float16,fp8,0,1.8392080307006835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,32,4,128,1,float16,float16,0,1.9564800262451172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,32,4,128,1,fp8,fp8,0,1.8354415893554688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,32,8,128,1,float16,float16,0,2.0981184005737306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,32,8,128,1,float16,fp8,0,1.8797792434692382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,32,32,128,1,float16,fp8,0,0.030004799365997314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,1,128,1,float16,float16,0,0.9350959777832031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,32,128,1,float16,float16,0,1.4354304313659667
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,32,128,1,float16,fp8,0,1.2244112014770507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,32,128,1,fp8,fp8,0,1.22640962600708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,1,128,1,float16,fp8,0,0.9247424125671386
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,32,8,128,1,fp8,fp8,0,1.8911455154418946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,1,128,1,fp8,fp8,0,0.9027615547180176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,2,128,1,float16,float16,0,0.9444304466247558
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,2,128,1,float16,fp8,0,0.9099136352539062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,2,128,1,fp8,fp8,0,0.908892822265625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,4,128,1,float16,float16,0,0.9804032325744629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,4,128,1,float16,fp8,0,0.9239808082580566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,4,128,1,fp8,fp8,0,0.9339584350585938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,8,128,1,float16,float16,0,1.0424768447875976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,8,128,1,float16,fp8,0,0.9460224151611328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,32,128,1,float16,float16,0,0.7253967761993408
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,1,128,1,float16,float16,0,0.4747727870941162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,32,8,128,1,fp8,fp8,0,0.9506575584411621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,1,128,1,fp8,fp8,0,0.45952157974243163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,2,128,1,float16,float16,0,0.47604961395263673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,2,128,1,float16,fp8,0,0.4604032039642334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,2,128,1,fp8,fp8,0,0.46092000007629397
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,4,128,1,float16,float16,0,0.4952976226806641
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,4,128,1,float16,fp8,0,0.4683519840240479
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,4,128,1,fp8,fp8,0,0.4637712001800537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,8,128,1,float16,float16,0,0.5274831771850585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,8,128,1,fp8,fp8,0,0.4813839912414551
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,8,128,1,float16,fp8,0,0.48284640312194826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,32,128,1,float16,fp8,0,0.31434719562530516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,32,128,1,fp8,fp8,0,0.31542720794677737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,1,128,1,float16,float16,0,0.24290719032287597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,1,128,1,fp8,fp8,0,0.23347840309143067
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,2,128,1,float16,float16,0,0.24618399143218994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,2,128,1,float16,fp8,0,0.2349071979522705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,2,128,1,fp8,fp8,0,0.23356320858001708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,4,128,1,float16,float16,0,0.25388801097869873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,4,128,1,float16,fp8,0,0.23950400352478027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,4,128,1,fp8,fp8,0,0.23783841133117675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,8,128,1,float16,float16,0,0.2708447933197021
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,8,128,1,float16,fp8,0,0.24721601009368896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,8,128,1,fp8,fp8,0,0.2452415943145752
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,32,128,1,float16,float16,0,0.19217439889907836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,32,128,1,float16,fp8,0,0.16423200368881224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,32,128,1,fp8,fp8,0,0.16293760538101196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,1,128,1,float16,float16,0,0.1276368021965027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,1,128,1,float16,fp8,0,0.12185759544372558
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,32,128,1,float16,fp8,0,0.619547176361084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,1,128,1,float16,fp8,0,0.46544637680053713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,1,128,1,fp8,fp8,0,0.12287520170211792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,2,128,1,float16,float16,0,0.12902239561080933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,2,128,1,float16,fp8,0,0.12334400415420532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,4,128,1,float16,float16,0,0.13470560312271118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,4,128,1,fp8,fp8,0,0.12532639503479004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,8,128,1,float16,float16,0,0.14155999422073365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,8,128,1,float16,fp8,0,0.12942880392074585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,8,128,1,fp8,fp8,0,0.12945280075073243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,32,128,1,float16,float16,0,0.10310560464859009
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,32,128,1,float16,fp8,0,0.08843200206756592
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,1,128,1,float16,float16,0,0.07027040123939514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,32,128,1,float16,float16,0,0.3716912031173706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,1,128,1,fp8,fp8,0,0.06628000140190124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,32,1,128,1,float16,fp8,0,0.23571999073028566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,2,128,1,float16,float16,0,0.0706928014755249
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,2,128,1,float16,fp8,0,0.06736000180244446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,2,128,1,fp8,fp8,0,0.06696799993515015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,4,128,1,float16,float16,0,0.0731216013431549
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,4,128,1,float16,fp8,0,0.06786239743232728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,4,128,1,fp8,fp8,0,0.0685696005821228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,8,128,1,float16,float16,0,0.07744479775428773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,8,128,1,float16,fp8,0,0.07018399834632874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,8,128,1,fp8,fp8,0,0.07032480239868164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,32,128,1,float16,float16,0,0.05821920037269592
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,32,128,1,float16,fp8,0,0.050569599866867064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,32,128,1,fp8,fp8,0,0.05055040121078491
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,1,128,1,float16,float16,0,0.03588959872722626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,1,128,1,float16,fp8,0,0.035806399583816526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,1,128,1,fp8,fp8,0,0.035990399122238156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,2,128,1,float16,float16,0,0.036652800440788266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,2,128,1,float16,fp8,0,0.03585920035839081
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,2,128,1,fp8,fp8,0,0.03573279976844788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,4,128,1,float16,float16,0,0.038577601313591
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,4,128,1,float16,fp8,0,0.036103999614715575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,4,128,1,fp8,fp8,0,0.03584960103034973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,8,128,1,float16,float16,0,0.04364959895610809
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,8,128,1,float16,fp8,0,0.03691360056400299
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,32,8,128,1,fp8,fp8,0,0.03663040101528168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,32,32,128,1,fp8,fp8,0,0.6186495780944824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,32,128,1,float16,float16,0,0.03472639918327332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,32,128,1,float16,fp8,0,0.028516799211502075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,32,128,1,fp8,fp8,0,0.028464001417160035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,1,128,1,float16,fp8,0,0.022459200024604796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,1,128,1,float16,float16,0,0.021798400580883025
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,2,128,1,fp8,fp8,0,0.12452000379562378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,32,4,128,1,float16,fp8,0,0.12448320388793946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,2,128,1,float16,fp8,0,0.0224031999707222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,2,128,1,fp8,fp8,0,0.022732800245285033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,4,128,1,float16,float16,0,0.022521600127220154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,4,128,1,float16,fp8,0,0.02266079932451248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,4,128,1,fp8,fp8,0,0.022742399573326112
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,8,128,1,float16,fp8,0,0.02290399968624115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,8,128,1,fp8,fp8,0,0.022974400222301482
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,32,128,1,float16,float16,0,0.01963520050048828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,32,128,1,float16,fp8,0,0.018875199556350707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,32,128,1,fp8,fp8,0,0.01899999976158142
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,1,128,1,float16,float16,0,0.01719360053539276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,1,128,1,float16,fp8,0,0.017238399386405943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,1,128,1,fp8,fp8,0,0.0173007994890213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,2,128,1,float16,float16,0,0.017127999663352968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,2,128,1,float16,fp8,0,0.017449599504470826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,2,128,1,fp8,fp8,0,0.017153599858283998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,4,128,1,float16,float16,0,0.017280000448226928
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,4,128,1,float16,fp8,0,0.017371200025081635
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,4,128,1,fp8,fp8,0,0.017422400414943695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,8,128,1,float16,fp8,0,0.01753759980201721
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,8,128,1,fp8,fp8,0,0.017508800327777862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,32,1,128,1,float16,float16,0,0.7450496196746826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,32,1,128,1,float16,fp8,0,0.7557199954986572
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,1,128,1,fp8,fp8,0,0.02240640074014664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,2,128,1,float16,float16,0,0.02205439954996109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,32,8,128,1,float16,float16,0,0.022841599583625794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,32,128,1,fp8,fp8,0,0.08871039748191833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,32,1,128,1,fp8,fp8,0,0.7559072017669678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,32,2,128,1,float16,float16,0,0.7511695861816406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,32,2,128,1,float16,fp8,0,0.7520031929016113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,32,1,128,1,float16,fp8,0,0.0662064015865326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,32,8,128,1,float16,float16,0,0.01757120043039322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,32,2,128,1,fp8,fp8,0,0.7467999935150147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,32,4,128,1,float16,float16,0,0.7849040031433105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,32,4,128,1,float16,fp8,0,0.7702799797058105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,32,4,128,1,fp8,fp8,0,0.7644815921783448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,32,8,128,1,float16,float16,0,0.8479760169982911
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,32,8,128,1,float16,fp8,0,0.7993984222412109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,32,128,1,float16,float16,0,0.6367983818054199
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,32,8,128,1,fp8,fp8,0,0.7949120044708252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,32,128,1,fp8,fp8,0,0.5265503883361816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,1,128,1,float16,fp8,0,0.3828320026397705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,1,128,1,fp8,fp8,0,0.38422560691833496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,2,128,1,float16,float16,0,0.3800175905227661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,2,128,1,float16,fp8,0,0.3823296070098877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,2,128,1,fp8,fp8,0,0.3845792055130005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,4,128,1,float16,float16,0,0.4006783962249756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,4,128,1,float16,fp8,0,0.39187839031219485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,4,128,1,fp8,fp8,0,0.3863568067550659
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,8,128,1,float16,float16,0,0.4295040130615234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,32,128,1,float16,float16,0,0.3277951955795288
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,8,128,1,fp8,fp8,0,0.40543198585510254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,32,128,1,float16,fp8,0,0.2695280075073242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,1,128,1,float16,fp8,0,0.1977936029434204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,32,128,1,fp8,fp8,0,0.2701280117034912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,1,128,1,fp8,fp8,0,0.19751039743423462
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,2,128,1,float16,float16,0,0.19994560480117798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,2,128,1,float16,fp8,0,0.19803199768066407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,2,128,1,fp8,fp8,0,0.19856319427490235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,4,128,1,float16,float16,0,0.20692160129547119
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,4,128,1,float16,fp8,0,0.2020848035812378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,4,128,1,fp8,fp8,0,0.2024463891983032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,32,128,1,float16,fp8,0,0.5241151809692383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,8,128,1,float16,float16,0,0.22229599952697754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,1,128,1,float16,float16,0,0.3765120029449463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,8,128,1,float16,fp8,0,0.2094208002090454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,32,128,1,float16,fp8,0,0.14124000072479248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,32,128,1,fp8,fp8,0,0.1418447971343994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,8,128,1,fp8,fp8,0,0.20818719863891602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,1,128,1,float16,float16,0,0.10383199453353882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,1,128,1,float16,fp8,0,0.10480159521102905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,1,128,1,fp8,fp8,0,0.10414079427719117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,2,128,1,float16,float16,0,0.10547840595245361
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,2,128,1,float16,fp8,0,0.10525120496749878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,2,128,1,fp8,fp8,0,0.1041856050491333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,32,8,128,1,float16,fp8,0,0.40660481452941893
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,4,128,1,float16,fp8,0,0.10702719688415527
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,4,128,1,fp8,fp8,0,0.10706720352172852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,8,128,1,float16,float16,0,0.1171231985092163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,32,128,1,float16,float16,0,0.09263039827346801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,8,128,1,float16,fp8,0,0.1111631989479065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,32,128,1,fp8,fp8,0,0.07685120105743408
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,1,128,1,float16,float16,0,0.05817919969558716
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,1,128,1,float16,fp8,0,0.057601600885391235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,2,128,1,float16,float16,0,0.05928639769554138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,32,1,128,1,float16,float16,0,0.1958896040916443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,2,128,1,float16,fp8,0,0.058140802383422854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,2,128,1,fp8,fp8,0,0.05797759890556335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,4,128,1,float16,float16,0,0.06146720051765442
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,4,128,1,float16,fp8,0,0.05896160006523132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,8,128,1,float16,float16,0,0.0652895987033844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,8,128,1,float16,fp8,0,0.06077759861946106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,8,128,1,fp8,fp8,0,0.06081600189208984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,32,128,1,float16,float16,0,0.05285919904708862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,32,128,1,float16,fp8,0,0.04488799870014191
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,32,128,1,fp8,fp8,0,0.04495840072631836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,32,128,1,float16,float16,0,0.17071839570999145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,1,128,1,float16,float16,0,0.03187200129032135
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,1,128,1,float16,fp8,0,0.031620800495147705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,1,128,1,fp8,fp8,0,0.0314736008644104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,2,128,1,float16,float16,0,0.03237600028514862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,2,128,1,float16,fp8,0,0.031611201167106626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,2,128,1,fp8,fp8,0,0.03097440004348755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,4,128,1,float16,float16,0,0.034052801132202146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,4,128,1,float16,fp8,0,0.031737598776817325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,4,128,1,fp8,fp8,0,0.031145599484443665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,8,128,1,float16,fp8,0,0.032267200946807864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,8,128,1,fp8,fp8,0,0.031939199566841124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,32,128,1,float16,float16,0,0.030144000053405763
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,32,128,1,float16,fp8,0,0.023736000061035156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,32,128,1,fp8,fp8,0,0.023902399837970732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,1,128,1,float16,float16,0,0.020127999782562255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,1,128,1,float16,fp8,0,0.020632000267505647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,1,128,1,fp8,fp8,0,0.02046400010585785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,1,128,1,fp8,fp8,0,0.057740801572799684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,2,128,1,float16,float16,0,0.020491200685501098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,2,128,1,float16,fp8,0,0.020563200116157532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,4,128,1,float16,float16,0,0.020588800311088562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,4,128,1,float16,fp8,0,0.020710399746894835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,4,128,1,fp8,fp8,0,0.02084160000085831
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,8,128,1,float16,float16,0,0.021081599593162536
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,8,128,1,float16,fp8,0,0.021011200547218323
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,8,128,1,fp8,fp8,0,0.020769600570201874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,4,128,1,fp8,fp8,0,0.05890240073204041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,32,128,1,float16,float16,0,0.01666239947080612
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,32,128,1,fp8,fp8,0,0.015960000455379486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,1,128,1,float16,float16,0,0.01435679942369461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,1,128,1,float16,fp8,0,0.01441120058298111
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,1,128,1,fp8,fp8,0,0.01436000019311905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,2,128,1,float16,float16,0,0.014243200421333313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,2,128,1,float16,fp8,0,0.014535999298095703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,2,128,1,fp8,fp8,0,0.014486399292945863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,4,128,1,float16,float16,0,0.014430400729179383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,4,128,1,float16,fp8,0,0.014560000598430633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,4,128,1,fp8,fp8,0,0.014305600523948669
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,8,128,1,float16,float16,0,0.014727999269962311
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,8,128,1,float16,fp8,0,0.014500799775123595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,8,128,1,fp8,fp8,0,0.014343999326229095
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,32,128,1,float16,float16,0,0.01531040072441101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,32,128,1,float16,fp8,0,0.014899200201034546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,32,128,1,fp8,fp8,0,0.015158399939537048
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,1,128,1,float16,float16,0,0.013631999492645264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,1,128,1,float16,fp8,0,0.013963200151920319
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,1,128,1,fp8,fp8,0,0.013920000195503235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,2,128,1,float16,float16,0,0.013748799264431
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,2,128,1,float16,fp8,0,0.014207999408245086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,2,128,1,fp8,fp8,0,0.014028799533843995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,4,128,1,float16,float16,0,0.013902400434017182
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,4,128,1,float16,fp8,0,0.01408800035715103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,4,128,1,fp8,fp8,0,0.01419519931077957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,8,128,1,float16,float16,0,0.013979199528694152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,8,128,1,float16,fp8,0,0.014256000518798828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,32,8,128,1,fp8,fp8,0,0.014203199744224548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,4,128,1,float16,float16,0,0.11027679443359376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,32,8,128,1,float16,float16,0,0.03784320056438446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,32,1,128,1,float16,float16,0,0.3557136058807373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,32,8,128,1,fp8,fp8,0,0.11100640296936035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,32,32,128,1,float16,fp8,0,0.0772816002368927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,32,1,128,1,fp8,fp8,0,0.3699887990951538
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,32,2,128,1,float16,float16,0,0.36102240085601806
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,32,2,128,1,fp8,fp8,0,0.020615999400615693
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,32,2,128,1,fp8,fp8,0,0.36914079189300536
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,32,2,128,1,float16,fp8,0,0.36786561012268065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,32,4,128,1,float16,float16,0,0.3755408048629761
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,32,32,128,1,float16,fp8,0,0.015564799308776855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,32,4,128,1,float16,fp8,0,0.3813103914260864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,32,4,128,1,fp8,fp8,0,0.3835056066513062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,32,8,128,1,float16,float16,0,0.40296001434326173
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,32,128,1,float16,float16,0,0.31242079734802247
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,32,8,128,1,fp8,fp8,0,0.4022928237915039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,32,128,1,float16,fp8,0,0.27104640007019043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,1,128,1,float16,float16,0,0.18313599824905397
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,32,128,1,fp8,fp8,0,0.27011520862579347
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,1,128,1,float16,fp8,0,0.18755040168762208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,1,128,1,fp8,fp8,0,0.18960000276565553
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,2,128,1,float16,float16,0,0.1852512001991272
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,2,128,1,float16,fp8,0,0.19090559482574462
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,2,128,1,fp8,fp8,0,0.190283203125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,4,128,1,float16,float16,0,0.1928015947341919
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,4,128,1,float16,fp8,0,0.19491039514541625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,4,128,1,fp8,fp8,0,0.19729440212249755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,8,128,1,float16,float16,0,0.20790560245513917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,8,128,1,float16,fp8,0,0.20662400722503663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,32,8,128,1,fp8,fp8,0,0.20773921012878419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,32,128,1,float16,float16,0,0.16605440378189087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,32,128,1,float16,fp8,0,0.14054399728775024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,1,128,1,float16,float16,0,0.09728000164031983
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,1,128,1,fp8,fp8,0,0.102183997631073
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,2,128,1,float16,float16,0,0.0978879988193512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,2,128,1,float16,fp8,0,0.10212479829788208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,2,128,1,fp8,fp8,0,0.10232959985733033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,4,128,1,float16,float16,0,0.10236799716949463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,4,128,1,float16,fp8,0,0.10466079711914063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,4,128,1,fp8,fp8,0,0.1050271987915039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,8,128,1,float16,float16,0,0.10986239910125732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,8,128,1,float16,fp8,0,0.11065440177917481
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,8,128,1,fp8,fp8,0,0.1093135952949524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,32,128,1,float16,float16,0,0.08932480216026306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,32,128,1,float16,fp8,0,0.07527520060539246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,32,128,1,fp8,fp8,0,0.07511519789695739
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,1,128,1,float16,float16,0,0.05591679811477661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,1,128,1,float16,fp8,0,0.05550720095634461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,1,128,1,fp8,fp8,0,0.05565919876098633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,2,128,1,float16,float16,0,0.056771200895309445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,2,128,1,float16,fp8,0,0.05589280128479004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,2,128,1,fp8,fp8,0,0.055820798873901366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,32,1,128,1,float16,fp8,0,0.36852641105651857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,4,128,1,float16,fp8,0,0.057158398628234866
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,4,128,1,fp8,fp8,0,0.056809598207473756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,8,128,1,float16,fp8,0,0.059087997674942015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,8,128,1,fp8,fp8,0,0.059350401163101196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,32,128,1,float16,fp8,0,0.040067198872566226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,32,128,1,fp8,fp8,0,0.039719998836517334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,1,128,1,float16,float16,0,0.027452799677848815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,32,8,128,1,float16,fp8,0,0.4002848148345947
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,1,128,1,float16,fp8,0,0.028244799375534056
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,1,128,1,fp8,fp8,0,0.02805120050907135
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,2,128,1,float16,float16,0,0.027902400493621825
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,2,128,1,float16,fp8,0,0.02829279899597168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,4,128,1,float16,float16,0,0.02964479923248291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,4,128,1,float16,fp8,0,0.028572800755500793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,4,128,1,fp8,fp8,0,0.028566399216651918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,8,128,1,float16,float16,0,0.035471999645233156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,8,128,1,float16,fp8,0,0.029284799098968507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,8,128,1,fp8,fp8,0,0.029203200340270997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,32,128,1,float16,float16,0,0.0281360000371933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,32,128,1,fp8,fp8,0,0.140502405166626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,32,128,1,float16,fp8,0,0.020028799772262573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,32,128,1,fp8,fp8,0,0.020155200362205507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,1,128,1,float16,float16,0,0.017260800302028655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,1,128,1,float16,fp8,0,0.01789119988679886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,2,128,1,float16,float16,0,0.017574399709701538
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,2,128,1,float16,fp8,0,0.018016000092029572
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,2,128,1,fp8,fp8,0,0.01804800033569336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,4,128,1,float16,float16,0,0.01815200001001358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,4,128,1,float16,fp8,0,0.018192000687122345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,4,128,1,fp8,fp8,0,0.01812800019979477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,4,128,1,float16,float16,0,0.05858880281448364
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,8,128,1,float16,fp8,0,0.018398399651050567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,8,128,1,fp8,fp8,0,0.018427200615406036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,32,128,1,float16,float16,0,0.01525759994983673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,32,128,1,float16,fp8,0,0.014139199256896972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,32,128,1,fp8,fp8,0,0.014131200313568116
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,1,128,1,float16,float16,0,0.012744000554084778
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,1,128,1,float16,fp8,0,0.012828800082206725
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,1,128,1,fp8,fp8,0,0.013064000010490417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,2,128,1,float16,float16,0,0.01249919980764389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,2,128,1,float16,fp8,0,0.013102400302886962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,2,128,1,fp8,fp8,0,0.013046400249004364
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,4,128,1,float16,float16,0,0.012880000472068786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,4,128,1,float16,fp8,0,0.012948800623416901
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,4,128,1,fp8,fp8,0,0.012950399518013
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,8,128,1,float16,float16,0,0.01300159990787506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,8,128,1,float16,fp8,0,0.01292639970779419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,32,8,128,1,fp8,fp8,0,0.013081599771976472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,32,128,1,float16,float16,0,0.013728000223636627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,32,128,1,float16,fp8,0,0.013102400302886962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,32,128,1,fp8,fp8,0,0.013107199966907502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,1,128,1,float16,float16,0,0.012166400253772736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,1,128,1,float16,fp8,0,0.012647999823093415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,1,128,1,fp8,fp8,0,0.01239679977297783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,2,128,1,float16,float16,0,0.012011200189590454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,32,1,128,1,float16,fp8,0,0.10128480195999146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,2,128,1,fp8,fp8,0,0.012718400359153748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,1,128,1,fp8,fp8,0,0.017955200374126436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,4,128,1,float16,fp8,0,0.012564800679683685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,4,128,1,fp8,fp8,0,0.012350399792194367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,8,128,1,float16,float16,0,0.012164799869060517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,8,128,1,float16,fp8,0,0.012379200011491776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,8,128,1,fp8,fp8,0,0.012627199292182922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,32,128,1,float16,float16,0,0.01284160017967224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,32,8,128,1,float16,float16,0,0.018580800294876097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,32,128,1,float16,fp8,0,0.01249919980764389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,1,128,1,float16,float16,0,0.012057600170373916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,32,8,128,1,float16,float16,0,0.06257920265197754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,1,128,1,float16,fp8,0,0.012331199645996094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,1,128,1,fp8,fp8,0,0.012444800138473511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,2,128,1,float16,fp8,0,0.012403199821710587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,2,128,1,fp8,fp8,0,0.012336000055074691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,4,128,1,float16,float16,0,0.011990399658679962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,4,128,1,float16,fp8,0,0.012356799840927125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,4,128,1,fp8,fp8,0,0.012171199917793274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,8,128,1,float16,float16,0,0.012161599844694138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,8,128,1,float16,fp8,0,0.0123648002743721
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,8,128,1,fp8,fp8,0,0.012368000298738479
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,32,1,128,1,float16,float16,0,0.18196640014648438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,32,1,128,1,float16,fp8,0,0.19012320041656494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,2,128,1,float16,fp8,0,0.012444800138473511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,32,4,128,1,float16,float16,0,0.012392000108957291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,32,2,128,1,float16,float16,0,0.18553279638290404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,32,2,128,1,float16,fp8,0,0.1886080026626587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,32,2,128,1,fp8,fp8,0,0.1885472059249878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,32,4,128,1,float16,float16,0,0.19288959503173828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,32,4,128,1,float16,fp8,0,0.19434560537338258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,32,4,128,1,fp8,fp8,0,0.19335520267486572
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,32,128,1,fp8,fp8,0,0.01271200031042099
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,32,8,128,1,float16,float16,0,0.20781440734863282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,32,2,128,1,float16,float16,0,0.012064000219106674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,32,8,128,1,float16,fp8,0,0.20581600666046143
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,32,128,1,float16,float16,0,0.1926751971244812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,1,128,1,float16,float16,0,0.0965712010860443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,32,8,128,1,fp8,fp8,0,0.20603361129760742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,1,128,1,float16,fp8,0,0.10187519788742065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,1,128,1,fp8,fp8,0,0.10185279846191406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,2,128,1,float16,fp8,0,0.10190240144729615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,2,128,1,float16,float16,0,0.09940479993820191
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,2,128,1,fp8,fp8,0,0.10200639963150024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,32,1,128,1,fp8,fp8,0,0.18990559577941896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,4,128,1,float16,float16,0,0.10366239547729492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,4,128,1,float16,fp8,0,0.10437599420547486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,4,128,1,fp8,fp8,0,0.10427199602127075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,8,128,1,float16,float16,0,0.1102463960647583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,8,128,1,float16,fp8,0,0.10964479446411132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,8,128,1,fp8,fp8,0,0.10941280126571655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,32,128,1,float16,float16,0,0.10224640369415283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,32,128,1,float16,fp8,0,0.08182560205459595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,1,128,1,float16,float16,0,0.055524802207946776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,32,128,1,fp8,fp8,0,0.08191999793052673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,1,128,1,float16,fp8,0,0.054955202341079715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,2,128,1,float16,float16,0,0.056550401449203494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,2,128,1,float16,fp8,0,0.056015998125076294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,2,128,1,fp8,fp8,0,0.05578240156173706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,4,128,1,float16,float16,0,0.05861279964447021
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,4,128,1,fp8,fp8,0,0.05750399827957153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,8,128,1,float16,float16,0,0.0627295970916748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,8,128,1,float16,fp8,0,0.059680002927780154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,8,128,1,fp8,fp8,0,0.05939199924468994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,32,128,1,float16,float16,0,0.05645760297775269
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,32,128,1,float16,fp8,0,0.042577600479125975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,32,128,1,float16,float16,0,0.05024799704551697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,32,128,1,fp8,fp8,0,0.04251520037651062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,1,128,1,float16,float16,0,0.027529600262641906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,1,128,1,fp8,fp8,0,0.028255999088287354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,1,128,1,float16,fp8,0,0.02817600071430206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,2,128,1,float16,fp8,0,0.028431999683380126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,2,128,1,fp8,fp8,0,0.028142398595809935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,4,128,1,float16,float16,0,0.030169600248336793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,4,128,1,float16,fp8,0,0.02860639989376068
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,32,128,1,float16,fp8,0,0.15164159536361693
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,32,2,128,1,fp8,fp8,0,0.028059199452400208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,8,128,1,float16,fp8,0,0.029300799965858458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,8,128,1,fp8,fp8,0,0.029124799370765685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,32,128,1,float16,float16,0,0.030060800909996032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,32,128,1,float16,fp8,0,0.020615999400615693
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,32,128,1,fp8,fp8,0,0.020398400723934174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,1,128,1,float16,float16,0,0.0174127995967865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,1,128,1,float16,fp8,0,0.017793600261211396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,1,128,1,fp8,fp8,0,0.017894400656223296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,2,128,1,float16,float16,0,0.017529599368572235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,2,128,1,float16,fp8,0,0.018084800243377684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,2,128,1,fp8,fp8,0,0.018052799999713896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,4,128,1,float16,float16,0,0.017961600422859193
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,4,128,1,float16,fp8,0,0.018143999576568603
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,4,128,1,fp8,fp8,0,0.01807360053062439
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,8,128,1,float16,float16,0,0.01851679980754852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,8,128,1,float16,fp8,0,0.018387199938297273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,32,8,128,1,fp8,fp8,0,0.01845120042562485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,32,128,1,float16,float16,0,0.015934400260448456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,1,128,1,fp8,fp8,0,0.05532640218734741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,32,128,1,float16,fp8,0,0.014158399403095245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,32,128,1,fp8,fp8,0,0.01414559930562973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,1,128,1,float16,fp8,0,0.013014400005340576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,32,4,128,1,float16,fp8,0,0.05780959725379944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,1,128,1,fp8,fp8,0,0.012956799566745758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,2,128,1,float16,float16,0,0.012824000418186187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,2,128,1,float16,fp8,0,0.012987199425697326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,2,128,1,fp8,fp8,0,0.013019199669361114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,4,128,1,float16,float16,0,0.012668800354003907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,4,128,1,float16,fp8,0,0.013038399815559387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,4,128,1,fp8,fp8,0,0.013009600341320038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,8,128,1,float16,float16,0,0.01324159950017929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,8,128,1,float16,fp8,0,0.01329759955406189
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,8,128,1,fp8,fp8,0,0.013006399571895599
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,32,128,1,float16,float16,0,0.011868800222873687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,2,128,1,float16,float16,0,0.027968001365661622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,32,128,1,float16,fp8,0,0.011068800091743469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,32,128,1,fp8,fp8,0,0.01088479980826378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,1,128,1,float16,float16,0,0.012280000001192093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,1,128,1,fp8,fp8,0,0.012531200051307678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,2,128,1,float16,float16,0,0.012217599898576736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,2,128,1,float16,fp8,0,0.012756800651550293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,2,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,4,128,1,fp8,fp8,0,0.0285504013299942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,32,8,128,1,float16,float16,0,0.03462719917297363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,4,128,1,float16,fp8,0,0.012606400251388549
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,8,128,1,float16,float16,0,0.012460800260305405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,8,128,1,float16,fp8,0,0.01260959953069687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,8,128,1,fp8,fp8,0,0.01260959953069687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,32,128,1,float16,float16,0,0.010921599715948105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,32,128,1,float16,fp8,0,0.010620799660682679
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,32,128,1,fp8,fp8,0,0.010838399827480315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,1,128,1,float16,float16,0,0.012068799883127212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,1,128,1,float16,fp8,0,0.01226079985499382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,1,128,1,fp8,fp8,0,0.012303999811410903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,2,128,1,float16,float16,0,0.01204800009727478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,2,128,1,float16,fp8,0,0.012544000148773193
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,2,128,1,fp8,fp8,0,0.012385600060224534
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,4,128,1,float16,float16,0,0.012086399644613267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,4,128,1,float16,fp8,0,0.012468799948692322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,4,128,1,fp8,fp8,0,0.012494400143623352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,8,128,1,float16,float16,0,0.011959999799728394
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,8,128,1,float16,fp8,0,0.01239359974861145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,32,8,128,1,fp8,fp8,0,0.012246400117874146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,32,128,1,float16,float16,0,0.010849600285291671
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,32,128,1,float16,fp8,0,0.01024480015039444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,32,128,1,fp8,fp8,0,0.010150399804115296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,1,128,1,float16,fp8,0,0.01180960014462471
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,1,128,1,float16,float16,0,0.011939200013875962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,1,128,1,fp8,fp8,0,0.011913599818944931
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,2,128,1,float16,float16,0,0.011950399726629257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,2,128,1,float16,fp8,0,0.011952000111341477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,2,128,1,fp8,fp8,0,0.012355200201272964
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,4,128,1,float16,float16,0,0.011873599886894227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,4,128,1,float16,fp8,0,0.012067200243473053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,8,128,1,float16,float16,0,0.01185920014977455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,8,128,1,float16,fp8,0,0.011881600320339202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,32,1,128,1,float16,float16,0,0.012884800136089326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,8,128,1,fp8,fp8,0,0.011894399672746659
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,32,1,128,1,float16,fp8,0,0.10295679569244384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,32,1,128,1,fp8,fp8,0,0.10300639867782593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,32,2,128,1,float16,float16,0,0.10031039714813232
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,32,2,128,1,float16,fp8,0,0.10368800163269043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,1,128,1,float16,fp8,0,0.012724800407886505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,32,2,128,1,fp8,fp8,0,0.10345120429992676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,32,32,128,1,fp8,fp8,0,0.15089600086212157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,4,128,1,fp8,fp8,0,0.012681600451469422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,32,4,128,1,fp8,fp8,0,0.10353599786758423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,32,8,128,1,float16,float16,0,0.1106063961982727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,32,8,128,1,float16,fp8,0,0.10974240303039551
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,32,8,128,1,fp8,fp8,0,0.10970239639282227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,32,128,1,float16,float16,0,0.13919999599456787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,32,128,1,float16,fp8,0,0.11883360147476196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,32,128,1,fp8,fp8,0,0.11801120042800903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,1,128,1,float16,float16,0,0.05594720244407654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,1,128,1,float16,fp8,0,0.05648159980773926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,1,128,1,fp8,fp8,0,0.0562720000743866
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,2,128,1,float16,float16,0,0.057036799192428586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,2,128,1,fp8,fp8,0,0.056377601623535153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,4,128,1,float16,float16,0,0.05915039777755737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,4,128,1,float16,fp8,0,0.05783680081367493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,4,128,1,fp8,fp8,0,0.05800480246543884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,8,128,1,float16,float16,0,0.06268799901008607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,8,128,1,float16,fp8,0,0.05996800065040588
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,8,128,1,fp8,fp8,0,0.060545599460601805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,32,128,1,float16,float16,0,0.0758463978767395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,32,128,1,float16,fp8,0,0.06350399851799012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,32,128,1,fp8,fp8,0,0.06355360150337219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,1,128,1,float16,float16,0,0.027401599287986755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,1,128,1,float16,fp8,0,0.02858079969882965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,1,128,1,fp8,fp8,0,0.028537601232528687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,2,128,1,float16,float16,0,0.027716800570487976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,32,1,128,1,float16,float16,0,0.09784799814224243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,2,128,1,float16,fp8,0,0.02839359939098358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,4,128,1,float16,float16,0,0.028880000114440918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,4,128,1,float16,fp8,0,0.028593599796295166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,4,128,1,fp8,fp8,0,0.0285504013299942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,8,128,1,float16,float16,0,0.03516159951686859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,8,128,1,float16,fp8,0,0.029265600442886352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,8,128,1,fp8,fp8,0,0.029292801022529603
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,32,128,1,float16,float16,0,0.04017280042171478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,32,4,128,1,float16,float16,0,0.10396000146865844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,32,128,1,float16,fp8,0,0.03116639852523804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,32,128,1,fp8,fp8,0,0.031193599104881287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,1,128,1,float16,fp8,0,0.018190400302410127
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,1,128,1,fp8,fp8,0,0.018296000361442567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,2,128,1,float16,float16,0,0.017868800461292265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,2,128,1,float16,fp8,0,0.018294399976730345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,2,128,1,fp8,fp8,0,0.018350400030612946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,4,128,1,float16,float16,0,0.01809120029211044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,4,128,1,float16,fp8,0,0.018515199422836304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,32,2,128,1,float16,fp8,0,0.05730559825897217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,4,128,1,fp8,fp8,0,0.018379199504852294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,8,128,1,float16,float16,0,0.018535999953746794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,8,128,1,float16,fp8,0,0.018544000387191773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,8,128,1,fp8,fp8,0,0.018651199340820313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,32,128,1,float16,float16,0,0.0212351992726326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,32,128,1,float16,fp8,0,0.019334399700164796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,32,128,1,fp8,fp8,0,0.019300800561904908
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,1,128,1,float16,float16,0,0.012919999659061432
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,1,128,1,float16,fp8,0,0.013097600638866424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,1,128,1,fp8,fp8,0,0.013091200590133667
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,2,128,1,float16,float16,0,0.01295199990272522
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,32,4,128,1,fp8,fp8,0,0.012161599844694138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,2,128,1,float16,fp8,0,0.01297760009765625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,2,128,1,fp8,fp8,0,0.01311199963092804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,4,128,1,float16,fp8,0,0.013184000551700593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,4,128,1,fp8,fp8,0,0.013313600420951843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,8,128,1,float16,float16,0,0.013158400356769562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,8,128,1,float16,fp8,0,0.013286399841308593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,8,128,1,fp8,fp8,0,0.013198399543762207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,32,128,1,float16,float16,0,0.014587199687957764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,32,128,1,float16,fp8,0,0.013446399569511413
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,32,128,1,fp8,fp8,0,0.013527999818325042
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,1,128,1,float16,float16,0,0.012225600332021714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,1,128,1,float16,fp8,0,0.012489599734544754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,1,128,1,fp8,fp8,0,0.0124719999730587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,32,2,128,1,fp8,fp8,0,0.028591999411582948
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,2,128,1,float16,fp8,0,0.012747199833393097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,2,128,1,fp8,fp8,0,0.012350399792194367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,4,128,1,float16,float16,0,0.012363199889659882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,4,128,1,float16,fp8,0,0.012668800354003907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,4,128,1,fp8,fp8,0,0.012398400157690049
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,8,128,1,float16,float16,0,0.012438400089740754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,8,128,1,float16,fp8,0,0.012566399574279786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,8,128,1,fp8,fp8,0,0.012689599394798278
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,32,128,1,float16,float16,0,0.0110944002866745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,32,128,1,float16,fp8,0,0.010824000090360641
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,32,128,1,fp8,fp8,0,0.010654400289058685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,1,128,1,float16,float16,0,0.012086399644613267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,1,128,1,float16,fp8,0,0.012361600250005721
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,1,128,1,fp8,fp8,0,0.01242239996790886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,32,4,128,1,float16,float16,0,0.012169600278139115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,2,128,1,float16,float16,0,0.012091200053691863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,2,128,1,float16,fp8,0,0.012356799840927125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,4,128,1,float16,float16,0,0.011975999921560287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,32,1,128,1,float16,float16,0,0.017884799838066102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,8,128,1,float16,float16,0,0.011937599629163742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,8,128,1,fp8,fp8,0,0.012392000108957291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,32,128,1,float16,float16,0,0.010945600271224976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,8,128,1,float16,fp8,0,0.012398400157690049
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,32,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,1,128,1,float16,float16,0,0.011683200299739838
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,1,128,1,float16,fp8,0,0.012121599912643433
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,1,128,1,fp8,fp8,0,0.011982399970293045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,2,128,1,float16,float16,0,0.01186240017414093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,2,128,1,float16,fp8,0,0.011932799965143204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,2,128,1,fp8,fp8,0,0.011806400120258331
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,4,128,1,float16,float16,0,0.011604800075292587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,4,128,1,float16,fp8,0,0.012107200175523757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,4,128,1,fp8,fp8,0,0.011952000111341477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,8,128,1,float16,float16,0,0.011873599886894227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,8,128,1,float16,fp8,0,0.01210559979081154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,8,128,1,fp8,fp8,0,0.012257599830627441
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,32,128,1,float16,float16,0,0.010710400342941285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,32,128,1,float16,fp8,0,0.010123199969530105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,32,128,1,fp8,fp8,0,0.010091199725866317
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,1,128,1,float16,float16,0,0.011420799791812897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,1,128,1,float16,fp8,0,0.011723200231790543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,1,128,1,fp8,fp8,0,0.011726400256156922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,2,128,1,float16,float16,0,0.011472000181674958
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,2,128,1,float16,fp8,0,0.01168000027537346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,2,128,1,fp8,fp8,0,0.011856000125408172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,4,128,1,float16,float16,0,0.011606399714946748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,4,128,1,float16,fp8,0,0.011838400363922119
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,4,128,1,fp8,fp8,0,0.011740799993276596
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,8,128,1,float16,float16,0,0.011515200138092041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,8,128,1,float16,fp8,0,0.01183359995484352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,32,8,128,1,fp8,fp8,0,0.011881600320339202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,32,4,128,1,float16,fp8,0,0.10550240278244019
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,2,128,1,fp8,fp8,0,0.012408000230789185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,32,1,128,1,float16,float16,0,0.05746080279350281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,4,128,1,float16,fp8,0,0.012412799894809723
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,32,4,128,1,fp8,fp8,0,0.012337599694728852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,32,1,128,1,fp8,fp8,0,0.05726879835128784
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,32,2,128,1,float16,float16,0,0.058273601531982425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,32,2,128,1,float16,fp8,0,0.05819360017776489
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,32,32,128,1,float16,fp8,0,0.010132800042629241
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,32,4,128,1,float16,fp8,0,0.05921919941902161
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,32,4,128,1,fp8,fp8,0,0.05880799889564514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,32,8,128,1,float16,fp8,0,0.06835039854049682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,32,8,128,1,fp8,fp8,0,0.06801760196685791
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,32,128,1,float16,float16,0,0.11822880506515503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,32,128,1,float16,fp8,0,0.10452159643173217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,32,128,1,fp8,fp8,0,0.104694402217865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,32,4,128,1,float16,float16,0,0.013031999766826629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,1,128,1,float16,float16,0,0.028115200996398925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,1,128,1,float16,fp8,0,0.029976001381874083
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,1,128,1,fp8,fp8,0,0.0295199990272522
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,2,128,1,float16,float16,0,0.0288239985704422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,2,128,1,float16,fp8,0,0.029676800966262816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,2,128,1,fp8,fp8,0,0.02948000133037567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,4,128,1,float16,float16,0,0.030462399125099182
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,32,2,128,1,float16,float16,0,0.012251199781894683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,4,128,1,float16,fp8,0,0.029583999514579774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,4,128,1,fp8,fp8,0,0.029688000679016113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,8,128,1,float16,fp8,0,0.034241598844528195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,8,128,1,fp8,fp8,0,0.034027200937271115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,32,128,1,float16,float16,0,0.06292480230331421
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,32,128,1,float16,fp8,0,0.051841598749160764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,32,128,1,fp8,fp8,0,0.05172799825668335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,1,128,1,float16,fp8,0,0.018513600528240203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,1,128,1,fp8,fp8,0,0.018695999681949616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,2,128,1,float16,float16,0,0.017990399897098542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,2,128,1,float16,fp8,0,0.018721599876880646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,2,128,1,fp8,fp8,0,0.018596799671649934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,32,1,128,1,float16,fp8,0,0.057631999254226685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,4,128,1,float16,float16,0,0.018084800243377684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,32,2,128,1,fp8,fp8,0,0.05812159776687622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,4,128,1,fp8,fp8,0,0.018742400407791137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,32,4,128,1,float16,float16,0,0.06059520244598389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,8,128,1,float16,float16,0,0.021172800660133363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,8,128,1,fp8,fp8,0,0.02069920003414154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,32,128,1,float16,float16,0,0.03210720121860504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,32,128,1,float16,fp8,0,0.0298224002122879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,1,128,1,float16,float16,0,0.012940800189971924
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,1,128,1,float16,fp8,0,0.013462400436401368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,1,128,1,fp8,fp8,0,0.01350879967212677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,2,128,1,float16,float16,0,0.013102400302886962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,2,128,1,float16,fp8,0,0.013471999764442444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,2,128,1,fp8,fp8,0,0.013364799320697784
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,4,128,1,float16,float16,0,0.01313599944114685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,4,128,1,float16,fp8,0,0.013387200236320496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,4,128,1,fp8,fp8,0,0.013457599282264709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,8,128,1,float16,float16,0,0.014441600441932679
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,8,128,1,float16,fp8,0,0.013700799643993377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,8,128,1,fp8,fp8,0,0.013724799454212188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,32,128,1,float16,float16,0,0.01968960016965866
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,32,128,1,float16,fp8,0,0.018721599876880646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,32,128,1,fp8,fp8,0,0.018692800402641298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,1,128,1,float16,float16,0,0.012494400143623352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,32,8,128,1,float16,float16,0,0.041228801012039185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,1,128,1,float16,fp8,0,0.01239679977297783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,1,128,1,fp8,fp8,0,0.012894399464130402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,2,128,1,float16,fp8,0,0.012600000202655792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,1,128,1,float16,float16,0,0.01780479997396469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,2,128,1,fp8,fp8,0,0.012905600666999816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,4,128,1,float16,fp8,0,0.012627199292182922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,8,128,1,float16,float16,0,0.010939200222492219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,8,128,1,float16,fp8,0,0.01069599986076355
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,8,128,1,fp8,fp8,0,0.010711999982595444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,32,128,1,float16,float16,0,0.013686400651931763
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,32,128,1,float16,fp8,0,0.012945599853992462
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,32,128,1,fp8,fp8,0,0.013128000497817992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,8,128,1,float16,fp8,0,0.020366400480270386
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,1,128,1,float16,float16,0,0.012219200283288956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,1,128,1,float16,fp8,0,0.012145599722862244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,2,128,1,float16,float16,0,0.012191999703645706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,32,32,128,1,fp8,fp8,0,0.030051198601722718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,2,128,1,float16,fp8,0,0.012452799826860428
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,4,128,1,float16,float16,0,0.012140800058841706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,4,128,1,float16,fp8,0,0.012452799826860428
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,4,128,1,fp8,fp8,0,0.012511999905109405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,8,128,1,float16,float16,0,0.010622400045394897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,8,128,1,float16,fp8,0,0.010328000038862228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,8,128,1,fp8,fp8,0,0.010318399965763092
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,32,128,1,float16,float16,0,0.011076799780130386
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,32,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,32,128,1,fp8,fp8,0,0.01037440001964569
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,1,128,1,float16,float16,0,0.011737599968910217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,1,128,1,float16,fp8,0,0.01199520006775856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,1,128,1,fp8,fp8,0,0.01207519993185997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,2,128,1,float16,float16,0,0.011718399822711945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,2,128,1,float16,fp8,0,0.011852800101041793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,2,128,1,fp8,fp8,0,0.01186719983816147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,4,128,1,float16,float16,0,0.011902400106191636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,4,128,1,float16,fp8,0,0.01204639971256256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,2,128,1,float16,float16,0,0.012171199917793274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,4,128,1,fp8,fp8,0,0.012060800194740295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,8,128,1,float16,fp8,0,0.010075200349092484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,4,128,1,fp8,fp8,0,0.01252640038728714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,8,128,1,fp8,fp8,0,0.009892799705266953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,32,128,1,float16,float16,0,0.010796800255775452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,32,128,1,float16,fp8,0,0.010356800258159637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,32,128,1,fp8,fp8,0,0.010183999687433243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,1,128,1,float16,float16,0,0.011454399675130844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,1,128,1,float16,fp8,0,0.011953599750995636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,32,4,128,1,float16,fp8,0,0.01884160041809082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,1,128,1,fp8,fp8,0,0.012001600116491318
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,2,128,1,float16,float16,0,0.011587200313806533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,1,128,1,fp8,fp8,0,0.012438400089740754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,4,128,1,float16,float16,0,0.011777599900960922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,4,128,1,float16,fp8,0,0.012120000272989272
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,32,2,128,1,fp8,fp8,0,0.01260959953069687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,4,128,1,fp8,fp8,0,0.012014400213956833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,8,128,1,float16,fp8,0,0.009622400254011154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,8,128,1,fp8,fp8,0,0.009590400010347366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,32,128,1,float16,float16,0,0.010558400303125381
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,32,128,1,float16,fp8,0,0.010047999769449234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,32,128,1,fp8,fp8,0,0.010177599638700486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,1,128,1,float16,float16,0,0.01162080019712448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,1,128,1,float16,fp8,0,0.011896000057458878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,1,128,1,fp8,fp8,0,0.011908800154924393
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,2,128,1,float16,float16,0,0.011604800075292587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,2,128,1,float16,fp8,0,0.011896000057458878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,2,128,1,fp8,fp8,0,0.011687999963760376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,4,128,1,float16,float16,0,0.011462400108575821
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,4,128,1,float16,fp8,0,0.011851199716329575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,4,128,1,fp8,fp8,0,0.011872000247240066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,8,128,1,float16,float16,0,0.010080000013113022
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,8,128,1,float16,fp8,0,0.009747199714183807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,32,8,128,1,fp8,fp8,0,0.009692800045013428
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,32,4,128,1,float16,float16,0,0.01228640004992485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,32,1,128,1,float16,float16,0,0.01815200001001358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,32,1,128,1,float16,fp8,0,0.01783519983291626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,32,1,128,1,fp8,fp8,0,0.018104000389575957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,32,2,128,1,float16,float16,0,0.02350880056619644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,32,2,128,1,float16,fp8,0,0.023345600068569183
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,32,2,128,1,fp8,fp8,0,0.023423999547958374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,32,4,128,1,float16,float16,0,0.03487200140953064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,32,4,128,1,float16,fp8,0,0.034483200311660765
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,32,4,128,1,fp8,fp8,0,0.03462879955768585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,32,8,128,1,float16,float16,0,0.05595679879188538
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,32,8,128,1,float16,fp8,0,0.05627040266990661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,32,8,128,1,fp8,fp8,0,0.0556768000125885
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,32,128,1,float16,float16,0,0.09060959815979004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,2,128,1,float16,fp8,0,0.01196959987282753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,32,8,128,1,float16,float16,0,0.07517759799957276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,1,128,1,float16,float16,0,0.013126400113105775
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,1,128,1,fp8,fp8,0,0.013084800541400909
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,2,128,1,float16,float16,0,0.01584160029888153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,2,128,1,float16,fp8,0,0.015398399531841278
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,8,128,1,float16,float16,0,0.009960000216960908
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,2,128,1,fp8,fp8,0,0.015479999780654907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,4,128,1,float16,float16,0,0.021107199788093566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,4,128,1,float16,fp8,0,0.02094399929046631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,4,128,1,fp8,fp8,0,0.0210207998752594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,8,128,1,float16,float16,0,0.032416000962257385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,8,128,1,float16,fp8,0,0.031753599643707275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,8,128,1,fp8,fp8,0,0.031964799761772154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,32,128,1,float16,float16,0,0.04982880055904389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,32,128,1,float16,fp8,0,0.04968000054359436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,1,128,1,float16,float16,0,0.011190400272607804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,1,128,1,float16,fp8,0,0.01133280023932457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,1,128,1,fp8,fp8,0,0.011486399918794632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,2,128,1,float16,float16,0,0.011584000289440155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,2,128,1,float16,fp8,0,0.011416000127792359
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,2,128,1,fp8,fp8,0,0.011524800211191177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,4,128,1,float16,float16,0,0.014091199636459351
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,4,128,1,float16,fp8,0,0.013937599956989288
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,4,128,1,fp8,fp8,0,0.01406240016222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,8,128,1,float16,float16,0,0.019857600331306458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,8,128,1,float16,fp8,0,0.01972319930791855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,32,8,128,1,float16,float16,0,0.010283199697732925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,8,128,1,fp8,fp8,0,0.0196943998336792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,32,128,1,float16,fp8,0,0.028748801350593566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,32,128,1,fp8,fp8,0,0.028763198852539064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,1,128,1,float16,float16,0,0.010788799822330475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,1,128,1,fp8,fp8,0,0.010753600299358368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,2,128,1,float16,float16,0,0.01072480008006096
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,2,128,1,float16,fp8,0,0.010708799958229065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,2,128,1,fp8,fp8,0,0.010675200074911118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,4,128,1,float16,float16,0,0.010835199803113937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,4,128,1,float16,fp8,0,0.010817600041627884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,32,2,128,1,fp8,fp8,0,0.011881600320339202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,4,128,1,fp8,fp8,0,0.010796800255775452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,1,128,1,float16,fp8,0,0.012359999865293504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,8,128,1,fp8,fp8,0,0.013176000118255616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,32,128,1,float16,float16,0,0.018648000061511995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,32,128,1,float16,fp8,0,0.017950400710105896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,32,128,1,fp8,fp8,0,0.01791999936103821
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,1,128,1,float16,float16,0,0.010072000324726105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,1,128,1,float16,fp8,0,0.009984000027179718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,1,128,1,fp8,fp8,0,0.009944000095129014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,32,32,128,1,fp8,fp8,0,0.04905439913272858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,2,128,1,float16,float16,0,0.010214400291442872
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,2,128,1,float16,fp8,0,0.010100799798965453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,2,128,1,fp8,fp8,0,0.010152000188827514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,4,128,1,float16,float16,0,0.010127999633550645
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,4,128,1,float16,fp8,0,0.010262399911880493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,4,128,1,fp8,fp8,0,0.010113599896430969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,8,128,1,float16,float16,0,0.010211200267076493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,8,128,1,float16,fp8,0,0.010164800286293029
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,32,8,128,1,fp8,fp8,0,0.010179200023412705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,32,128,1,float16,float16,0,0.013329599797725678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,32,128,1,float16,fp8,0,0.012590399384498597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,32,128,1,fp8,fp8,0,0.012721599638462066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,1,128,1,float16,float16,0,0.009998399764299393
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,1,128,1,float16,fp8,0,0.009827200323343277
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,32,128,1,float16,float16,0,0.029841598868370057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,1,128,1,fp8,fp8,0,0.009700799733400345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,2,128,1,float16,float16,0,0.00963200032711029
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,2,128,1,float16,fp8,0,0.00995360016822815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,2,128,1,fp8,fp8,0,0.0095551997423172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,4,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,4,128,1,fp8,fp8,0,0.010311999917030334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,8,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,8,128,1,float16,fp8,0,0.010183999687433243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,8,128,1,fp8,fp8,0,0.010203199833631516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,32,128,1,float16,float16,0,0.010630399733781815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,32,128,1,fp8,fp8,0,0.08915519714355469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,32,128,1,float16,fp8,0,0.010198400169610978
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,32,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,8,128,1,float16,float16,0,0.013129599392414093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,1,128,1,float16,fp8,0,0.009344000369310379
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,1,128,1,fp8,fp8,0,0.009404800087213516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,2,128,1,float16,float16,0,0.009721600264310837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,2,128,1,float16,fp8,0,0.009513600170612336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,2,128,1,fp8,fp8,0,0.009596800059080124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,4,128,1,float16,float16,0,0.00973920002579689
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,4,128,1,float16,fp8,0,0.00957920029759407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,4,128,1,fp8,fp8,0,0.009556800127029419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,8,128,1,float16,fp8,0,0.009772799909114838
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,8,128,1,fp8,fp8,0,0.009702400118112565
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,32,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,32,128,1,float16,fp8,0,0.010078399628400802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,32,128,1,fp8,fp8,0,0.01011039987206459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,1,128,1,float16,float16,0,0.0097680002450943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,1,128,1,float16,fp8,0,0.009521599858999252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,1,128,1,fp8,fp8,0,0.00942239984869957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,2,128,1,float16,float16,0,0.009542399644851684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,2,128,1,float16,fp8,0,0.009353599697351455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,2,128,1,fp8,fp8,0,0.00939520001411438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,4,128,1,float16,float16,0,0.009628800302743911
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,4,128,1,float16,fp8,0,0.009700799733400345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,4,128,1,fp8,fp8,0,0.009759999811649323
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,8,128,1,float16,float16,0,0.00984480008482933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,8,128,1,float16,fp8,0,0.009699200093746186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,32,8,128,1,fp8,fp8,0,0.009651199728250504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,32,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,32,128,1,float16,fp8,0,0.009920000284910201
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,32,4,128,1,float16,fp8,0,0.009944000095129014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,32,128,1,fp8,fp8,0,0.009924799948930741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,1,128,1,float16,float16,0,0.009431999921798707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,1,128,1,float16,fp8,0,0.009363199770450591
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,1,128,1,fp8,fp8,0,0.009356799721717834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,2,128,1,float16,float16,0,0.00957759991288185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,2,128,1,float16,fp8,0,0.009480000287294389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,2,128,1,fp8,fp8,0,0.009384000301361084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,4,128,1,float16,float16,0,0.009567999839782714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,4,128,1,float16,fp8,0,0.009406399726867676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,32,32,128,1,float16,fp8,0,0.089683198928833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,8,128,1,float16,float16,0,0.009679999947547913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,8,128,1,fp8,fp8,0,0.009561599791049957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,8,128,1,float16,fp8,0,0.013247999548912048
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,8,128,1,float16,float16,0,0.009803199768066406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,32,1,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,4,128,1,fp8,fp8,0,0.009441599994897843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,32,1,128,1,float16,float16,0,0.00952799990773201
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,32,8,128,1,float16,fp8,0,0.009779199957847595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,24,1,128,1,float16,fp8,0,18.039195251464843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,24,1,128,1,fp8,fp8,0,17.524098205566407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,24,2,128,1,float16,fp8,0,17.824220275878908
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,24,4,128,1,float16,fp8,0,17.75610809326172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,24,1,128,1,float16,float16,0,22.788841247558594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,24,2,128,1,float16,float16,0,22.619171142578125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,24,2,128,1,fp8,fp8,0,18.134446716308595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,24,4,128,1,float16,float16,0,22.974037170410156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,24,128,1,float16,float16,0,12.065340423583985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,24,4,128,1,fp8,fp8,0,18.62108612060547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,1,128,1,float16,float16,0,11.462758636474609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,24,8,128,1,float16,fp8,0,18.601560974121092
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,24,8,128,1,fp8,fp8,0,18.561468505859374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,24,8,128,1,float16,float16,0,23.91901397705078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,1,128,1,float16,fp8,0,9.283850860595702
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,1,128,1,fp8,fp8,0,9.239144134521485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,2,128,1,float16,fp8,0,9.019306945800782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,2,128,1,fp8,fp8,0,9.023088073730468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,2,128,1,float16,float16,0,11.613497924804687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,4,128,1,float16,float16,0,11.272057342529298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,4,128,1,float16,fp8,0,9.05337905883789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,4,128,1,fp8,fp8,0,9.165379333496094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,8,128,1,float16,fp8,0,8.962328338623047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,24,128,1,float16,float16,0,5.758763122558594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,8,128,1,fp8,fp8,0,9.102043151855469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,8,128,1,float16,float16,0,11.76998062133789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,1,128,1,float16,fp8,0,4.442849731445312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,1,128,1,fp8,fp8,0,4.43591194152832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,1,128,1,float16,float16,0,5.6145790100097654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,2,128,1,float16,float16,0,5.5966846466064455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,2,128,1,float16,fp8,0,4.373681640625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,2,128,1,fp8,fp8,0,4.46550407409668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,4,128,1,float16,float16,0,5.597649765014649
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,4,128,1,float16,fp8,0,4.4590095520019535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,4,128,1,fp8,fp8,0,4.483172988891601
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,8,128,1,float16,float16,0,6.0263103485107425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,8,128,1,float16,fp8,0,4.579627227783203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,24,128,1,float16,float16,0,2.7605695724487305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,8,128,1,fp8,fp8,0,4.630737686157227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,1,128,1,float16,fp8,0,2.222327995300293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,1,128,1,float16,float16,0,2.718891143798828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,1,128,1,fp8,fp8,0,2.238630485534668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,2,128,1,float16,float16,0,2.8241472244262695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,2,128,1,float16,fp8,0,2.3003440856933595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,2,128,1,fp8,fp8,0,2.328152084350586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,4,128,1,float16,float16,0,2.858310317993164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,4,128,1,float16,fp8,0,2.2262815475463866
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,4,128,1,fp8,fp8,0,2.472697639465332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,8,128,1,float16,float16,0,2.721939277648926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,8,128,1,float16,fp8,0,2.2471439361572267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,8,128,1,fp8,fp8,0,2.3694143295288086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,24,1,128,1,float16,fp8,0,10.081436920166016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,24,1,128,1,float16,float16,0,13.10096435546875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,24,1,128,1,fp8,fp8,0,10.322321319580078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,24,2,128,1,float16,float16,0,13.348410034179688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,24,2,128,1,float16,fp8,0,10.335771179199218
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,24,2,128,1,fp8,fp8,0,10.415606689453124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,24,4,128,1,float16,float16,0,13.236721801757813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,24,4,128,1,float16,fp8,0,10.36684799194336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,24,4,128,1,fp8,fp8,0,10.210902404785156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,24,8,128,1,float16,float16,0,13.58148193359375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,24,8,128,1,float16,fp8,0,10.380633544921874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,24,8,128,1,fp8,fp8,0,10.556501007080078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,24,128,1,float16,float16,0,7.005340576171875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,24,128,1,float16,fp8,0,2.216035270690918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,24,24,128,1,fp8,fp8,0,2.209756851196289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,24,128,1,float16,fp8,0,4.607110214233399
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,24,24,128,1,fp8,fp8,0,4.556721496582031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,24,128,1,fp8,fp8,0,5.224332809448242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,1,128,1,float16,fp8,0,4.954596710205078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,24,128,1,float16,fp8,0,9.161457824707032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,1,128,1,fp8,fp8,0,5.077371215820312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,24,128,1,float16,fp8,0,5.257886505126953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,24,24,128,1,fp8,fp8,0,9.25221939086914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,1,128,1,float16,float16,0,6.2459358215332035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,2,128,1,float16,float16,0,6.35306396484375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,2,128,1,float16,fp8,0,5.042534255981446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,2,128,1,fp8,fp8,0,5.014080047607422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,4,128,1,float16,float16,0,6.297057723999023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,4,128,1,float16,fp8,0,5.14710578918457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,4,128,1,fp8,fp8,0,5.1103168487548825
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,24,128,1,float16,fp8,0,2.599929618835449
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,24,128,1,float16,float16,0,3.134219169616699
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,8,128,1,float16,fp8,0,5.0477855682373045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,24,128,1,fp8,fp8,0,2.755721664428711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,8,128,1,float16,float16,0,6.608233642578125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,24,8,128,1,fp8,fp8,0,5.3201744079589846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,1,128,1,float16,float16,0,3.1602767944335937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,1,128,1,float16,fp8,0,2.524633598327637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,1,128,1,fp8,fp8,0,2.492856025695801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,2,128,1,float16,float16,0,3.182472038269043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,2,128,1,float16,fp8,0,2.50009765625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,2,128,1,fp8,fp8,0,2.518499183654785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,4,128,1,float16,fp8,0,2.552948760986328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,4,128,1,fp8,fp8,0,2.714740753173828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,4,128,1,float16,float16,0,3.180820846557617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,8,128,1,float16,fp8,0,2.516606330871582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,24,128,1,float16,fp8,0,1.3188223838806152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,24,128,1,float16,float16,0,1.5846240043640136
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,24,128,1,fp8,fp8,0,1.342244815826416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,8,128,1,fp8,fp8,0,2.6422895431518554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,1,128,1,fp8,fp8,0,1.247396755218506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,1,128,1,float16,float16,0,1.4764431953430175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,2,128,1,float16,fp8,0,1.3255135536193847
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,2,128,1,float16,float16,0,1.4398736000061034
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,2,128,1,fp8,fp8,0,1.27915678024292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,4,128,1,float16,fp8,0,1.2581040382385253
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,4,128,1,float16,float16,0,1.4797648429870605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,4,128,1,fp8,fp8,0,1.3864447593688964
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,8,128,1,float16,fp8,0,1.3905327796936036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,8,128,1,float16,float16,0,1.4637167930603028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,8,128,1,fp8,fp8,0,1.2979776382446289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,24,8,128,1,float16,float16,0,3.1637344360351562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,24,1,128,1,float16,fp8,0,1.4303600311279296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,24,1,128,1,float16,fp8,0,7.115723419189453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,24,1,128,1,fp8,fp8,0,7.136918640136718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,24,2,128,1,float16,fp8,0,7.104895782470703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,24,2,128,1,fp8,fp8,0,7.247550201416016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,24,1,128,1,float16,float16,0,8.858713531494141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,24,2,128,1,float16,float16,0,9.08052978515625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,24,4,128,1,float16,fp8,0,7.188600158691406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,24,4,128,1,float16,float16,0,9.117453002929688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,24,128,1,fp8,fp8,0,3.753339385986328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,24,128,1,float16,fp8,0,4.146201705932617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,24,128,1,float16,float16,0,5.24083366394043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,24,4,128,1,fp8,fp8,0,7.446814727783203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,1,128,1,float16,float16,0,4.469504165649414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,24,8,128,1,float16,fp8,0,7.2050529479980465
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,24,8,128,1,fp8,fp8,0,7.877291107177735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,24,8,128,1,float16,float16,0,10.121025848388673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,1,128,1,float16,fp8,0,3.5652481079101563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,1,128,1,fp8,fp8,0,3.56212158203125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,2,128,1,float16,fp8,0,3.6225231170654295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,2,128,1,float16,float16,0,4.521976089477539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,2,128,1,fp8,fp8,0,3.859415817260742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,4,128,1,float16,fp8,0,3.617055892944336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,4,128,1,float16,float16,0,4.433606338500977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,4,128,1,fp8,fp8,0,3.589064025878906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,24,128,1,float16,float16,0,2.2770687103271485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,24,128,1,float16,fp8,0,1.886854362487793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,24,128,1,fp8,fp8,0,1.9712879180908203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,8,128,1,float16,float16,0,4.5319568634033205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,8,128,1,float16,fp8,0,3.6152225494384767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,24,8,128,1,fp8,fp8,0,3.6575183868408203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,1,128,1,float16,float16,0,2.087348747253418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,1,128,1,float16,fp8,0,1.8845775604248047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,1,128,1,fp8,fp8,0,1.746873664855957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,2,128,1,fp8,fp8,0,1.770631980895996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,2,128,1,float16,fp8,0,1.899617576599121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,2,128,1,float16,float16,0,2.1004976272583007
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,4,128,1,float16,float16,0,2.1257152557373047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,4,128,1,float16,fp8,0,1.9635520935058595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,24,128,1,float16,float16,0,1.281276798248291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,24,128,1,float16,fp8,0,1.0489263534545898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,8,128,1,float16,fp8,0,1.7855968475341797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,8,128,1,fp8,fp8,0,1.8039167404174805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,8,128,1,float16,float16,0,2.2184560775756834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,24,128,1,fp8,fp8,0,0.9648320198059082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,1,128,1,float16,fp8,0,0.9259856224060059
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,1,128,1,float16,float16,0,1.1255231857299806
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,1,128,1,fp8,fp8,0,0.8847743988037109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,2,128,1,fp8,fp8,0,0.8897071838378906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,2,128,1,float16,float16,0,1.0084192276000976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,2,128,1,float16,fp8,0,1.0473072052001953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,4,128,1,float16,fp8,0,0.8932255744934082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,4,128,1,float16,float16,0,1.0174943923950195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,4,128,1,fp8,fp8,0,1.0179327964782714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,8,128,1,float16,float16,0,1.0376496315002441
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,8,128,1,float16,fp8,0,0.8861632347106934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,24,8,128,1,fp8,fp8,0,0.9788592338562012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,24,4,128,1,fp8,fp8,0,1.810126495361328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,24,1,128,1,float16,fp8,0,9.12805633544922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,24,1,128,1,fp8,fp8,0,9.313337707519532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,24,2,128,1,float16,fp8,0,9.233831787109375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,24,2,128,1,fp8,fp8,0,9.523804473876954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,24,4,128,1,float16,fp8,0,9.379206085205078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,24,1,128,1,float16,float16,0,12.017025756835938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,24,2,128,1,float16,float16,0,11.90764923095703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,24,4,128,1,float16,float16,0,11.811803436279297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,24,128,1,float16,fp8,0,4.965460968017578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,24,128,1,fp8,fp8,0,4.862347030639649
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,24,128,1,float16,float16,0,6.283452987670898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,1,128,1,float16,float16,0,5.8737537384033205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,24,4,128,1,fp8,fp8,0,9.172672271728516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,24,8,128,1,float16,fp8,0,9.419329833984374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,24,8,128,1,fp8,fp8,0,9.63263397216797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,1,128,1,float16,fp8,0,4.755831909179688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,24,8,128,1,float16,float16,0,12.103628540039063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,1,128,1,fp8,fp8,0,4.556358337402344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,2,128,1,float16,float16,0,5.847663879394531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,2,128,1,float16,fp8,0,4.735612869262695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,2,128,1,fp8,fp8,0,4.574694442749023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,4,128,1,float16,fp8,0,4.668204879760742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,4,128,1,float16,float16,0,6.11176643371582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,24,128,1,float16,fp8,0,2.4396127700805663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,24,128,1,float16,float16,0,2.791489601135254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,4,128,1,fp8,fp8,0,4.798791885375977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,8,128,1,float16,fp8,0,4.683846282958984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,8,128,1,float16,float16,0,6.02062873840332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,24,128,1,fp8,fp8,0,2.4418672561645507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,24,8,128,1,fp8,fp8,0,4.782043075561523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,1,128,1,float16,fp8,0,2.3251520156860352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,1,128,1,float16,float16,0,2.940153694152832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,1,128,1,fp8,fp8,0,2.4478368759155273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,2,128,1,float16,fp8,0,2.304582405090332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,2,128,1,float16,float16,0,2.957548713684082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,2,128,1,fp8,fp8,0,2.404052734375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,4,128,1,float16,fp8,0,2.330044746398926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,4,128,1,float16,float16,0,2.850129508972168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,4,128,1,fp8,fp8,0,2.3114383697509764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,24,128,1,float16,float16,0,1.593404769897461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,8,128,1,float16,fp8,0,2.3609792709350588
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,8,128,1,fp8,fp8,0,2.323431968688965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,24,128,1,float16,fp8,0,1.259055995941162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,24,128,1,fp8,fp8,0,1.2270591735839844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,24,8,128,1,float16,float16,0,2.9688207626342775
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,1,128,1,float16,fp8,0,1.1482671737670898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,1,128,1,float16,float16,0,1.5072928428649903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,1,128,1,fp8,fp8,0,1.2541695594787599
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,2,128,1,float16,fp8,0,1.1417152404785156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,2,128,1,float16,float16,0,1.371555233001709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,2,128,1,fp8,fp8,0,1.2534912109375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,4,128,1,float16,float16,0,1.3874560356140138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,4,128,1,float16,fp8,0,1.3286704063415526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,4,128,1,fp8,fp8,0,1.1525584220886231
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,24,128,1,float16,fp8,0,0.6569024085998535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,8,128,1,float16,fp8,0,1.2011903762817382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,24,128,1,float16,float16,0,0.8145263671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,8,128,1,fp8,fp8,0,1.1542672157287597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,24,128,1,fp8,fp8,0,0.7380032062530517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,1,128,1,float16,float16,0,0.7164063930511475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,1,128,1,fp8,fp8,0,0.6127999782562256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,2,128,1,float16,float16,0,0.6701039791107177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,2,128,1,float16,fp8,0,0.6606592178344727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,2,128,1,fp8,fp8,0,0.5717887878417969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,4,128,1,float16,float16,0,0.6840576171875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,4,128,1,float16,fp8,0,0.6135568141937255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,4,128,1,fp8,fp8,0,0.6166560173034668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,8,128,1,float16,float16,0,0.698963212966919
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,8,128,1,float16,fp8,0,0.5822015762329101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,8,128,1,fp8,fp8,0,0.6730559825897217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,24,8,128,1,float16,float16,0,1.4887248039245606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,24,1,128,1,float16,fp8,0,0.5714159965515136
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,24,1,128,1,float16,fp8,0,5.3604896545410154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,24,1,128,1,fp8,fp8,0,5.308800125122071
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,24,2,128,1,float16,fp8,0,5.326347351074219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,24,2,128,1,float16,float16,0,6.7618049621582035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,24,1,128,1,float16,float16,0,7.079411315917969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,24,2,128,1,fp8,fp8,0,5.318588638305664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,24,4,128,1,float16,float16,0,6.545051574707031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,24,128,1,float16,fp8,0,2.7722959518432617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,24,4,128,1,float16,fp8,0,5.493342590332031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,24,4,128,1,fp8,fp8,0,5.355204772949219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,24,128,1,float16,float16,0,3.606342315673828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,24,128,1,fp8,fp8,0,2.8692272186279295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,24,8,128,1,fp8,fp8,0,5.5414783477783205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,24,8,128,1,float16,fp8,0,5.4844623565673825
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,24,8,128,1,float16,float16,0,6.945403289794922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,1,128,1,float16,fp8,0,2.681839942932129
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,1,128,1,fp8,fp8,0,2.6520687103271485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,1,128,1,float16,float16,0,3.256635284423828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,2,128,1,float16,float16,0,3.357593536376953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,2,128,1,float16,fp8,0,2.6457216262817385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,2,128,1,fp8,fp8,0,2.6664127349853515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,4,128,1,float16,fp8,0,2.842540740966797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,4,128,1,float16,float16,0,3.3206832885742186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,4,128,1,fp8,fp8,0,2.6566560745239256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,24,128,1,float16,float16,0,1.6644767761230468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,24,128,1,float16,fp8,0,1.4459296226501466
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,8,128,1,float16,fp8,0,2.7846927642822266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,24,128,1,fp8,fp8,0,1.4111056327819824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,8,128,1,float16,float16,0,3.3584320068359377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,1,128,1,float16,float16,0,1.7017328262329101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,24,8,128,1,fp8,fp8,0,2.8414031982421877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,1,128,1,float16,fp8,0,1.346668815612793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,1,128,1,fp8,fp8,0,1.369273567199707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,2,128,1,float16,float16,0,1.5413167953491211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,4,128,1,float16,float16,0,1.8063568115234374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,4,128,1,fp8,fp8,0,1.3521856307983398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,4,128,1,float16,fp8,0,1.4302191734313965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,8,128,1,float16,fp8,0,1.3523568153381347
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,8,128,1,float16,float16,0,1.5859359741210937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,24,128,1,float16,fp8,0,0.7193535804748535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,24,128,1,float16,float16,0,0.9515904426574707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,24,128,1,fp8,fp8,0,0.7349855899810791
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,8,128,1,fp8,fp8,0,1.4713727951049804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,1,128,1,float16,float16,0,0.7519904136657715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,1,128,1,float16,fp8,0,0.7993375778198242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,1,128,1,fp8,fp8,0,0.6612304210662842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,2,128,1,float16,fp8,0,0.687171220779419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,2,128,1,float16,float16,0,0.8577024459838867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,4,128,1,float16,float16,0,0.7682816028594971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,4,128,1,fp8,fp8,0,0.6659135818481445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,4,128,1,float16,fp8,0,0.74967360496521
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,8,128,1,float16,float16,0,0.7777647972106934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,8,128,1,float16,fp8,0,0.7724031925201416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,24,128,1,float16,fp8,0,0.3737087965011597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,2,128,1,float16,fp8,0,1.3381296157836915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,8,128,1,fp8,fp8,0,0.6941359996795654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,24,128,1,fp8,fp8,0,0.4000400066375732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,1,128,1,float16,float16,0,0.40346240997314453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,1,128,1,float16,fp8,0,0.3418287992477417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,1,128,1,fp8,fp8,0,0.33389921188354493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,2,128,1,float16,float16,0,0.38836801052093506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,2,128,1,float16,fp8,0,0.3643775939941406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,2,128,1,fp8,fp8,0,0.3551520109176636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,4,128,1,float16,float16,0,0.3935312032699585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,4,128,1,float16,fp8,0,0.33801438808441164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,4,128,1,fp8,fp8,0,0.3493792057037354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,8,128,1,float16,fp8,0,0.35743041038513185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,8,128,1,fp8,fp8,0,0.34577438831329343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,24,2,128,1,fp8,fp8,0,0.693235206604004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,24,128,1,float16,float16,0,0.44664478302001953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,24,2,128,1,fp8,fp8,0,1.3275615692138671
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,24,8,128,1,float16,float16,0,0.4044544219970703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,24,1,128,1,float16,fp8,0,4.879126358032226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,24,1,128,1,fp8,fp8,0,4.905868911743164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,24,1,128,1,float16,float16,0,6.2180320739746096
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,24,2,128,1,float16,float16,0,6.367209625244141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,24,2,128,1,fp8,fp8,0,4.959008026123047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,24,4,128,1,float16,float16,0,6.290687942504883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,24,4,128,1,float16,fp8,0,4.984560012817383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,24,4,128,1,fp8,fp8,0,4.97685432434082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,24,2,128,1,float16,fp8,0,5.002377700805664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,24,128,1,float16,float16,0,3.430790328979492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,24,8,128,1,float16,fp8,0,5.300961685180664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,24,8,128,1,float16,float16,0,6.7953346252441404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,24,8,128,1,fp8,fp8,0,5.241191864013672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,1,128,1,float16,fp8,0,2.4364288330078123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,24,128,1,float16,fp8,0,2.7465152740478516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,24,128,1,fp8,fp8,0,2.6830944061279296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,1,128,1,float16,float16,0,2.9045711517333985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,1,128,1,fp8,fp8,0,2.546254348754883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,2,128,1,float16,float16,0,3.0209423065185548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,2,128,1,float16,fp8,0,2.6348400115966797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,2,128,1,fp8,fp8,0,2.474048042297363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,4,128,1,float16,fp8,0,2.478767967224121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,4,128,1,fp8,fp8,0,2.679190444946289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,4,128,1,float16,float16,0,3.015483283996582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,24,128,1,float16,float16,0,1.6449583053588868
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,24,128,1,float16,fp8,0,1.353439998626709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,8,128,1,float16,float16,0,3.1925775527954103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,8,128,1,fp8,fp8,0,2.5548879623413088
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,24,8,128,1,float16,fp8,0,2.674991989135742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,24,128,1,fp8,fp8,0,1.3502752304077148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,1,128,1,float16,float16,0,1.6042415618896484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,1,128,1,float16,fp8,0,1.3446271896362305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,1,128,1,fp8,fp8,0,1.220524787902832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,2,128,1,float16,fp8,0,1.2522015571594238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,2,128,1,float16,float16,0,1.5362208366394043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,2,128,1,fp8,fp8,0,1.2511712074279786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,4,128,1,float16,float16,0,1.4404911994934082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,4,128,1,fp8,fp8,0,1.2712271690368653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,4,128,1,float16,fp8,0,1.332595157623291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,24,128,1,float16,float16,0,0.8071680068969727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,8,128,1,float16,float16,0,1.4696031570434571
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,8,128,1,float16,fp8,0,1.2647071838378907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,24,128,1,fp8,fp8,0,0.6809567928314209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,1,128,1,float16,float16,0,0.7160831928253174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,24,8,128,1,fp8,fp8,0,1.3420592308044434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,1,128,1,float16,fp8,0,0.6990960121154786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,1,128,1,fp8,fp8,0,0.6751408100128173
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,2,128,1,float16,fp8,0,0.627948808670044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,2,128,1,float16,float16,0,0.7060848236083984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,2,128,1,fp8,fp8,0,0.6310463905334472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,4,128,1,float16,float16,0,0.7230607986450195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,4,128,1,float16,fp8,0,0.6634223937988282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,4,128,1,fp8,fp8,0,0.6522255897521972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,8,128,1,float16,fp8,0,0.6373280048370361
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,8,128,1,fp8,fp8,0,0.641867208480835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,24,128,1,float16,float16,0,0.4272287845611572
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,24,128,1,float16,fp8,0,0.35307040214538576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,1,128,1,fp8,fp8,0,0.3152944087982178
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,1,128,1,float16,float16,0,0.3598880052566528
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,2,128,1,float16,fp8,0,0.3218015909194946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,2,128,1,fp8,fp8,0,0.3170543909072876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,4,128,1,float16,float16,0,0.36600480079650877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,4,128,1,fp8,fp8,0,0.3167776107788086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,8,128,1,float16,float16,0,0.37312319278717043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,8,128,1,float16,fp8,0,0.32173120975494385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,24,128,1,float16,fp8,0,0.7514336109161377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,24,128,1,float16,float16,0,0.20941441059112548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,8,128,1,fp8,fp8,0,0.32551040649414065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,24,128,1,fp8,fp8,0,0.1772304058074951
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,1,128,1,float16,float16,0,0.18534560203552247
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,1,128,1,float16,fp8,0,0.16052960157394408
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,1,128,1,fp8,fp8,0,0.16160960197448732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,2,128,1,float16,float16,0,0.18649599552154542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,2,128,1,float16,fp8,0,0.16058720350265504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,2,128,1,fp8,fp8,0,0.16111040115356445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,4,128,1,float16,float16,0,0.18790719509124756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,4,128,1,float16,fp8,0,0.16288959980010986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,24,8,128,1,float16,float16,0,0.7396624088287354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,8,128,1,float16,float16,0,0.19253439903259278
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,8,128,1,float16,fp8,0,0.16652159690856932
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,1,128,1,float16,fp8,0,0.32074239253997805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,2,128,1,float16,float16,0,0.3752880096435547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,8,128,1,fp8,fp8,0,0.165556800365448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,4,128,1,float16,fp8,0,0.320580792427063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,24,128,1,float16,fp8,0,0.17917920351028443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,24,1,128,1,float16,fp8,0,2.8578512191772463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,24,1,128,1,fp8,fp8,0,2.8854127883911134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,24,4,128,1,fp8,fp8,0,0.16350239515304565
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,24,1,128,1,float16,float16,0,3.3742015838623045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,24,2,128,1,float16,fp8,0,2.9027168273925783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,24,24,128,1,fp8,fp8,0,0.3543616056442261
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,24,2,128,1,float16,float16,0,3.5950672149658205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,24,2,128,1,fp8,fp8,0,2.8943376541137695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,24,128,1,float16,float16,0,2.086569595336914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,24,4,128,1,float16,fp8,0,2.973164749145508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,24,4,128,1,float16,float16,0,3.609000015258789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,24,8,128,1,float16,fp8,0,3.033404731750488
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,24,128,1,float16,fp8,0,1.6338464736938476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,24,8,128,1,float16,float16,0,3.869715118408203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,24,128,1,fp8,fp8,0,1.736440086364746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,1,128,1,float16,float16,0,1.7153280258178711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,1,128,1,float16,fp8,0,1.5762720108032227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,2,128,1,float16,float16,0,1.645427131652832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,2,128,1,float16,fp8,0,1.5837087631225586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,2,128,1,fp8,fp8,0,1.486996841430664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,24,4,128,1,fp8,fp8,0,2.931219291687012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,4,128,1,float16,float16,0,1.8309040069580078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,4,128,1,fp8,fp8,0,1.5125087738037108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,24,8,128,1,fp8,fp8,0,3.000702476501465
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,8,128,1,float16,float16,0,1.796504020690918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,24,128,1,float16,float16,0,1.0599472045898437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,8,128,1,fp8,fp8,0,1.5113776206970215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,24,128,1,fp8,fp8,0,0.8183216094970703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,1,128,1,float16,float16,0,0.9016544342041015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,1,128,1,fp8,fp8,0,1.4732144355773926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,1,128,1,float16,fp8,0,0.8160528182983399
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,1,128,1,fp8,fp8,0,0.7305967807769775
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,2,128,1,float16,float16,0,0.8291296005249024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,2,128,1,float16,fp8,0,0.804105567932129
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,2,128,1,fp8,fp8,0,0.772972822189331
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,4,128,1,float16,fp8,0,0.742465591430664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,4,128,1,float16,fp8,0,1.5036432266235351
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,4,128,1,fp8,fp8,0,0.7725247859954834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,8,128,1,float16,float16,0,0.8763584136962891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,8,128,1,float16,fp8,0,0.7568624019622803
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,24,128,1,float16,float16,0,0.5042384147644043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,8,128,1,fp8,fp8,0,0.7559152126312256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,24,128,1,float16,fp8,0,0.45388641357421877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,24,128,1,fp8,fp8,0,0.41551361083984373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,1,128,1,float16,float16,0,0.4212975978851318
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,1,128,1,float16,fp8,0,0.3688496112823486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,1,128,1,fp8,fp8,0,0.39397759437561036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,2,128,1,float16,float16,0,0.41915359497070315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,2,128,1,float16,fp8,0,0.37016479969024657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,2,128,1,fp8,fp8,0,0.37102398872375486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,4,128,1,float16,float16,0,0.44036159515380857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,4,128,1,float16,fp8,0,0.3777503967285156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,4,128,1,fp8,fp8,0,0.3763711929321289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,8,128,1,float16,float16,0,0.43721919059753417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,8,128,1,float16,fp8,0,0.3932287931442261
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,24,128,1,float16,float16,0,0.26044158935546874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,24,8,128,1,fp8,fp8,0,0.38427200317382815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,24,128,1,float16,fp8,0,0.21303999423980713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,24,128,1,fp8,fp8,0,0.2166896104812622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,1,128,1,float16,float16,0,0.21614398956298828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,1,128,1,float16,fp8,0,0.19009599685668946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,1,128,1,fp8,fp8,0,0.1906623959541321
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,2,128,1,float16,float16,0,0.21849439144134522
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,2,128,1,float16,fp8,0,0.1907039999961853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,2,128,1,fp8,fp8,0,0.19089920520782472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,4,128,1,float16,float16,0,0.8468079566955566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,4,128,1,float16,float16,0,0.22132480144500732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,4,128,1,float16,fp8,0,0.1936959981918335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,4,128,1,fp8,fp8,0,0.19334239959716798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,8,128,1,float16,fp8,0,0.19911520481109618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,24,128,1,float16,fp8,0,0.11110559701919556
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,8,128,1,fp8,fp8,0,0.1990880012512207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,24,128,1,fp8,fp8,0,0.1123136043548584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,1,128,1,float16,float16,0,0.11270079612731934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,1,128,1,float16,fp8,0,0.10087679624557495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,1,128,1,fp8,fp8,0,0.10146399736404418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,2,128,1,float16,float16,0,0.11397279500961303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,2,128,1,float16,fp8,0,0.10140479803085327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,2,128,1,fp8,fp8,0,0.10104479789733886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,4,128,1,float16,float16,0,0.11763039827346802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,4,128,1,float16,fp8,0,0.10084799528121949
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,4,128,1,fp8,fp8,0,0.1015056014060974
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,8,128,1,float16,float16,0,0.12138559818267822
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,8,128,1,float16,fp8,0,0.10277760028839111
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,8,128,1,fp8,fp8,0,0.10333919525146484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,24,8,128,1,float16,fp8,0,1.5126784324645997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,24,8,128,1,float16,float16,0,0.22506558895111084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,24,24,128,1,float16,float16,0,0.13866879940032958
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,24,1,128,1,float16,fp8,0,2.8284879684448243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,24,1,128,1,fp8,fp8,0,2.821892738342285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,24,1,128,1,float16,float16,0,3.1702543258666993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,24,2,128,1,float16,float16,0,3.3941055297851563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,24,24,128,1,float16,fp8,0,0.8297183990478516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,24,2,128,1,fp8,fp8,0,2.8138288497924804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,24,4,128,1,float16,fp8,0,2.888158416748047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,24,4,128,1,fp8,fp8,0,2.8944591522216796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,24,4,128,1,float16,float16,0,3.4567760467529296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,24,2,128,1,float16,fp8,0,2.826055908203125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,24,8,128,1,fp8,fp8,0,2.9570655822753906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,24,8,128,1,float16,float16,0,3.6568767547607424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,24,128,1,float16,float16,0,1.9846015930175782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,24,128,1,float16,fp8,0,1.6489328384399413
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,24,128,1,fp8,fp8,0,1.6586015701293946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,1,128,1,float16,float16,0,1.7312639236450196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,1,128,1,float16,fp8,0,1.4113759994506836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,1,128,1,fp8,fp8,0,1.4088064193725587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,2,128,1,float16,float16,0,1.7249839782714844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,2,128,1,fp8,fp8,0,1.429635238647461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,2,128,1,float16,fp8,0,1.5100848197937011
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,4,128,1,float16,float16,0,1.682124710083008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,4,128,1,float16,fp8,0,1.5675840377807617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,4,128,1,fp8,fp8,0,1.4462080001831055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,8,128,1,float16,float16,0,1.6923200607299804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,24,8,128,1,float16,fp8,0,3.0745967864990233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,8,128,1,float16,fp8,0,1.5782976150512695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,24,128,1,float16,float16,0,0.99136323928833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,24,128,1,float16,fp8,0,0.9023887634277343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,24,128,1,fp8,fp8,0,0.8499567985534668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,1,128,1,float16,float16,0,0.8023872375488281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,24,8,128,1,fp8,fp8,0,1.4899120330810547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,1,128,1,float16,fp8,0,0.7164383888244629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,1,128,1,fp8,fp8,0,0.7100111961364746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,2,128,1,float16,float16,0,0.8128751754760742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,2,128,1,float16,fp8,0,0.7170623779296875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,2,128,1,fp8,fp8,0,0.7489808082580567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,4,128,1,float16,fp8,0,0.7302879810333252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,4,128,1,float16,float16,0,0.8275584220886231
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,4,128,1,fp8,fp8,0,0.7310624122619629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,24,128,1,float16,fp8,0,0.43195199966430664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,24,128,1,float16,float16,0,0.500648021697998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,8,128,1,float16,fp8,0,0.74726881980896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,1,128,1,float16,float16,0,0.43236799240112306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,1,128,1,float16,fp8,0,0.3612303972244263
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,1,128,1,fp8,fp8,0,0.3607392072677612
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,2,128,1,float16,float16,0,0.40891199111938475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,2,128,1,float16,fp8,0,0.388372802734375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,4,128,1,float16,float16,0,0.41798081398010256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,4,128,1,float16,fp8,0,0.36945281028747556
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,4,128,1,fp8,fp8,0,0.3826319932937622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,8,128,1,float16,float16,0,0.43401279449462893
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,8,128,1,float16,fp8,0,0.37887039184570315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,8,128,1,fp8,fp8,0,0.3882512092590332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,24,128,1,float16,float16,0,0.25897440910339353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,24,128,1,fp8,fp8,0,0.21908318996429443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,1,128,1,float16,float16,0,0.20942881107330322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,1,128,1,float16,fp8,0,0.18538719415664673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,1,128,1,fp8,fp8,0,0.1859055995941162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,2,128,1,float16,float16,0,0.210265588760376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,2,128,1,float16,fp8,0,0.1868559956550598
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,2,128,1,fp8,fp8,0,0.18628640174865724
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,4,128,1,float16,float16,0,0.21547360420227052
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,4,128,1,float16,fp8,0,0.18892960548400878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,8,128,1,float16,float16,0,0.8588607788085938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,8,128,1,float16,float16,0,0.2223423957824707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,24,8,128,1,fp8,fp8,0,0.8049776077270507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,8,128,1,float16,fp8,0,0.1989456057548523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,8,128,1,fp8,fp8,0,0.19420160055160524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,24,128,1,float16,float16,0,0.1345968008041382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,24,128,1,float16,fp8,0,0.11529439687728882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,2,128,1,fp8,fp8,0,0.3635871887207031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,1,128,1,float16,float16,0,0.10989919900894166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,1,128,1,fp8,fp8,0,0.09695839881896973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,2,128,1,float16,float16,0,0.11101919412612915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,2,128,1,float16,fp8,0,0.0968720018863678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,2,128,1,fp8,fp8,0,0.09674400091171265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,4,128,1,float16,float16,0,0.11330080032348633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,4,128,1,float16,fp8,0,0.09928799867630005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,4,128,1,fp8,fp8,0,0.09963679909706116
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,8,128,1,float16,float16,0,0.1178096055984497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,8,128,1,float16,fp8,0,0.10258079767227173
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,8,128,1,fp8,fp8,0,0.10257760286331177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,24,128,1,float16,fp8,0,0.21753280162811278
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,24,128,1,float16,float16,0,0.08014400005340576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,24,128,1,float16,fp8,0,0.06554080247879028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,24,128,1,fp8,fp8,0,0.06565120220184326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,1,128,1,float16,float16,0,0.059331202507019044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,1,128,1,float16,fp8,0,0.053776001930236815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,2,128,1,float16,float16,0,0.059849601984024045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,2,128,1,float16,fp8,0,0.05375199913978577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,2,128,1,fp8,fp8,0,0.054225599765777587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,4,128,1,float16,float16,0,0.059988802671432494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,4,128,1,float16,fp8,0,0.054446399211883545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,4,128,1,fp8,fp8,0,0.054281598329544066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,8,128,1,float16,float16,0,0.06345599889755249
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,8,128,1,float16,fp8,0,0.054902398586273195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,8,128,1,fp8,fp8,0,0.05511040091514587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,24,24,128,1,fp8,fp8,0,0.4222095966339111
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,24,128,1,fp8,fp8,0,0.11408640146255493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,24,1,128,1,float16,fp8,0,0.09735839962959289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,24,1,128,1,float16,fp8,0,1.7292671203613281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,24,1,128,1,float16,float16,0,1.9653263092041016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,24,1,128,1,fp8,fp8,0,1.7418272018432617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,24,1,128,1,fp8,fp8,0,0.0538640022277832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,24,2,128,1,float16,fp8,0,1.757441520690918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,24,2,128,1,float16,float16,0,1.9949728012084962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,24,2,128,1,fp8,fp8,0,1.7407087326049804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,24,4,128,1,fp8,fp8,0,0.19034719467163086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,24,4,128,1,float16,fp8,0,1.797755241394043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,24,4,128,1,fp8,fp8,0,1.7922399520874024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,24,8,128,1,float16,float16,0,2.1014272689819338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,24,128,1,float16,float16,0,1.2627152442932128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,24,128,1,float16,fp8,0,1.0476655960083008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,24,128,1,fp8,fp8,0,1.0560511589050292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,24,8,128,1,fp8,fp8,0,1.8497760772705079
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,1,128,1,float16,float16,0,0.972537612915039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,1,128,1,float16,fp8,0,0.9750736236572266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,2,128,1,float16,fp8,0,0.882084846496582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,2,128,1,float16,float16,0,0.9756272315979004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,2,128,1,fp8,fp8,0,0.8821855545043945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,4,128,1,float16,float16,0,1.0033087730407715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,4,128,1,fp8,fp8,0,0.901854419708252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,4,128,1,float16,fp8,0,0.9606719970703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,24,4,128,1,float16,float16,0,2.1804784774780273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,8,128,1,float16,float16,0,1.054145622253418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,8,128,1,fp8,fp8,0,0.9723423957824707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,24,128,1,float16,fp8,0,0.539572811126709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,24,128,1,float16,float16,0,0.6371888160705567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,1,128,1,float16,float16,0,0.49233441352844237
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,1,128,1,float16,fp8,0,0.45157442092895506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,1,128,1,fp8,fp8,0,0.4466271877288818
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,2,128,1,float16,float16,0,0.4973951816558838
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,2,128,1,float16,fp8,0,0.44678077697753904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,2,128,1,fp8,fp8,0,0.4545296192169189
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,4,128,1,float16,float16,0,0.5103839874267578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,4,128,1,float16,fp8,0,0.45653600692749025
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,4,128,1,fp8,fp8,0,0.45719518661499026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,8,128,1,float16,float16,0,0.5349264144897461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,24,128,1,float16,float16,0,0.32325599193572996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,1,128,1,fp8,fp8,0,0.922481632232666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,8,128,1,float16,fp8,0,0.4753856182098389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,8,128,1,fp8,fp8,0,0.47241601943969724
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,24,128,1,float16,fp8,0,0.27765278816223143
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,24,128,1,fp8,fp8,0,0.27619519233703616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,1,128,1,float16,float16,0,0.2528223991394043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,1,128,1,float16,fp8,0,0.22903199195861818
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,1,128,1,fp8,fp8,0,0.22797279357910155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,2,128,1,float16,float16,0,0.2550544023513794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,2,128,1,float16,fp8,0,0.23005599975585939
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,2,128,1,fp8,fp8,0,0.2290463924407959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,4,128,1,float16,float16,0,0.26132800579071047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,24,8,128,1,float16,fp8,0,0.9327103614807128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,4,128,1,float16,fp8,0,0.23559200763702393
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,4,128,1,fp8,fp8,0,0.2348112106323242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,8,128,1,float16,float16,0,0.2727247953414917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,8,128,1,float16,fp8,0,0.2420815944671631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,24,8,128,1,fp8,fp8,0,0.24347999095916747
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,24,128,1,float16,fp8,0,0.1464463949203491
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,24,128,1,fp8,fp8,0,0.1463919997215271
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,1,128,1,float16,float16,0,0.1334928035736084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,1,128,1,float16,fp8,0,0.11976799964904786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,1,128,1,fp8,fp8,0,0.11942880153656006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,2,128,1,float16,float16,0,0.13462400436401367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,24,8,128,1,float16,fp8,0,2.0001440048217773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,2,128,1,float16,fp8,0,0.12035679817199707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,2,128,1,fp8,fp8,0,0.12009600400924683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,4,128,1,float16,float16,0,0.13797119855880738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,4,128,1,fp8,fp8,0,0.12206720113754273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,8,128,1,float16,float16,0,0.1435871958732605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,4,128,1,float16,fp8,0,0.1225119948387146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,24,128,1,float16,fp8,0,0.08263199925422668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,8,128,1,fp8,fp8,0,0.12641439437866211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,24,128,1,fp8,fp8,0,0.08281919956207276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,1,128,1,float16,float16,0,0.06985440254211425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,1,128,1,float16,fp8,0,0.06355199813842774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,1,128,1,fp8,fp8,0,0.0629584014415741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,2,128,1,float16,float16,0,0.07192320227622986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,2,128,1,float16,fp8,0,0.06320160031318664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,2,128,1,fp8,fp8,0,0.06364960074424744
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,4,128,1,float16,float16,0,0.07446399927139283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,4,128,1,float16,fp8,0,0.06458079814910889
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,4,128,1,fp8,fp8,0,0.06439200043678284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,8,128,1,float16,float16,0,0.07825599908828736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,8,128,1,float16,fp8,0,0.06699360013008118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,8,128,1,fp8,fp8,0,0.06754559874534607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,24,128,1,float16,float16,0,0.05582879781723023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,24,128,1,float16,fp8,0,0.04548639953136444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,24,128,1,fp8,fp8,0,0.04521439969539642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,1,128,1,float16,float16,0,0.04054880142211914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,1,128,1,float16,fp8,0,0.037561601400375365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,2,128,1,float16,float16,0,0.04070239961147308
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,2,128,1,float16,fp8,0,0.03772799968719483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,2,128,1,fp8,fp8,0,0.03756319880485535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,4,128,1,float16,float16,0,0.04126240015029907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,24,128,1,float16,float16,0,0.1696895956993103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,4,128,1,float16,fp8,0,0.03787519931793213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,4,128,1,fp8,fp8,0,0.03808000087738037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,8,128,1,float16,float16,0,0.042222398519515994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,24,24,128,1,fp8,fp8,0,0.5693727970123291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,8,128,1,float16,fp8,0,0.038366401195526124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,24,8,128,1,float16,fp8,0,0.12780159711837769
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,24,24,128,1,float16,float16,0,0.0907535970211029
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,24,1,128,1,float16,fp8,0,1.8383760452270508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,24,1,128,1,float16,float16,0,2.0052751541137694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,1,128,1,fp8,fp8,0,0.03774079978466034
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,24,1,128,1,fp8,fp8,0,1.8436304092407227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,24,2,128,1,float16,float16,0,2.0217119216918946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,24,8,128,1,fp8,fp8,0,0.0381520003080368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,24,2,128,1,float16,fp8,0,1.8483583450317382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,24,2,128,1,fp8,fp8,0,1.83776798248291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,24,4,128,1,float16,float16,0,2.119094467163086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,24,4,128,1,float16,fp8,0,1.8737152099609375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,24,4,128,1,fp8,fp8,0,1.9013151168823241
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,24,128,1,float16,float16,0,1.3866559982299804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,24,128,1,float16,fp8,0,1.1546544075012206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,24,8,128,1,float16,fp8,0,1.9738143920898437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,24,8,128,1,fp8,fp8,0,1.9694448471069337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,24,8,128,1,float16,float16,0,2.2976943969726564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,24,128,1,fp8,fp8,0,1.2164447784423829
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,1,128,1,float16,float16,0,0.9947423934936523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,1,128,1,float16,fp8,0,0.9240048408508301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,1,128,1,fp8,fp8,0,0.9264911651611328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,2,128,1,float16,float16,0,1.0111583709716796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,2,128,1,float16,fp8,0,0.9230671882629394
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,2,128,1,fp8,fp8,0,0.9209919929504394
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,4,128,1,float16,fp8,0,0.9692879676818847
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,4,128,1,float16,float16,0,1.144164752960205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,4,128,1,fp8,fp8,0,0.9808639526367188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,8,128,1,float16,float16,0,1.109278392791748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,8,128,1,float16,fp8,0,0.991759967803955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,24,128,1,float16,fp8,0,0.5857247829437255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,24,128,1,fp8,fp8,0,0.5929008007049561
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,1,128,1,float16,fp8,0,0.4747151851654053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,1,128,1,float16,float16,0,0.5432271957397461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,24,8,128,1,fp8,fp8,0,0.9930000305175781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,1,128,1,fp8,fp8,0,0.48909759521484375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,2,128,1,float16,float16,0,0.513212776184082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,2,128,1,float16,fp8,0,0.46869921684265137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,2,128,1,fp8,fp8,0,0.4725632190704346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,4,128,1,float16,fp8,0,0.4998976230621338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,4,128,1,float16,float16,0,0.5333392143249511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,24,128,1,float16,float16,0,0.3597759962081909
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,24,128,1,float16,fp8,0,0.30718240737915037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,8,128,1,fp8,fp8,0,0.5024415969848632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,8,128,1,float16,float16,0,0.5714672088623047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,24,128,1,fp8,fp8,0,0.3028127908706665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,1,128,1,float16,float16,0,0.26530559062957765
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,1,128,1,float16,fp8,0,0.23901760578155518
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,1,128,1,fp8,fp8,0,0.24148800373077392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,2,128,1,float16,float16,0,0.26544320583343506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,2,128,1,float16,fp8,0,0.24384639263153077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,2,128,1,fp8,fp8,0,0.2403775930404663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,4,128,1,float16,fp8,0,0.24878239631652832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,4,128,1,float16,float16,0,0.272761607170105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,4,128,1,fp8,fp8,0,0.24909439086914062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,8,128,1,float16,float16,0,0.2872783899307251
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,8,128,1,float16,fp8,0,0.25998079776763916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,24,128,1,float16,float16,0,0.1900431990623474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,24,8,128,1,fp8,fp8,0,0.25934720039367676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,24,128,1,float16,fp8,0,0.15849440097808837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,1,128,1,float16,float16,0,0.1383952021598816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,1,128,1,float16,fp8,0,0.12651679515838624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,1,128,1,fp8,fp8,0,0.12543200254440307
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,24,128,1,float16,float16,0,0.6955520153045655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,2,128,1,float16,float16,0,0.14010239839553834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,2,128,1,fp8,fp8,0,0.12645920515060424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,4,128,1,float16,fp8,0,0.13061599731445311
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,4,128,1,float16,float16,0,0.14324480295181274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,4,128,1,fp8,fp8,0,0.1293328046798706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,8,128,1,float16,float16,0,0.15217759609222412
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,4,128,1,fp8,fp8,0,0.4809391975402832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,8,128,1,fp8,fp8,0,0.13605600595474243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,24,128,1,float16,fp8,0,0.08786399960517884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,24,128,1,float16,float16,0,0.10288959741592407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,24,128,1,fp8,fp8,0,0.08808479905128479
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,1,128,1,float16,float16,0,0.07523040175437927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,24,8,128,1,float16,fp8,0,0.5031968116760254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,2,128,1,float16,float16,0,0.07631999850273133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,2,128,1,float16,fp8,0,0.06663519740104676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,2,128,1,fp8,fp8,0,0.06706719994544982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,4,128,1,float16,float16,0,0.07817280292510986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,4,128,1,float16,fp8,0,0.0691424012184143
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,4,128,1,fp8,fp8,0,0.06862879991531372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,8,128,1,float16,float16,0,0.08239840269088745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,8,128,1,float16,fp8,0,0.07377920150756836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,24,128,1,float16,float16,0,0.06001920104026794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,24,128,1,float16,fp8,0,0.05187360048294067
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,24,128,1,fp8,fp8,0,0.0515936017036438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,1,128,1,float16,float16,0,0.03958880007266998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,1,128,1,float16,fp8,0,0.03812319934368134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,1,128,1,fp8,fp8,0,0.037513598799705505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,2,128,1,float16,float16,0,0.03946079909801483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,2,128,1,float16,fp8,0,0.03767040073871612
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,2,128,1,fp8,fp8,0,0.0377375990152359
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,24,128,1,fp8,fp8,0,0.16003040075302125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,4,128,1,float16,fp8,0,0.03925279974937439
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,2,128,1,float16,fp8,0,0.12698719501495362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,4,128,1,fp8,fp8,0,0.03930720090866089
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,8,128,1,float16,float16,0,0.04378400146961212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,8,128,1,float16,fp8,0,0.040227198600769044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,8,128,1,fp8,fp8,0,0.04011679887771606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,24,128,1,float16,float16,0,0.032652801275253295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,24,128,1,float16,fp8,0,0.029249599575996398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,24,128,1,fp8,fp8,0,0.029177600145339967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,1,128,1,float16,float16,0,0.023577600717544556
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,1,128,1,float16,fp8,0,0.022782400250434875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,1,128,1,fp8,fp8,0,0.02253279983997345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,2,128,1,float16,float16,0,0.023844799399375914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,2,128,1,float16,fp8,0,0.022732800245285033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,2,128,1,fp8,fp8,0,0.022784000635147093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,4,128,1,float16,float16,0,0.024014399945735933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,4,128,1,float16,fp8,0,0.024398399889469145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,4,128,1,fp8,fp8,0,0.02438880056142807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,8,128,1,float16,float16,0,0.024926400184631346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,8,128,1,float16,fp8,0,0.024607999622821806
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,24,8,128,1,fp8,fp8,0,0.02493920028209686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,8,128,1,fp8,fp8,0,0.0741375982761383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,24,1,128,1,float16,float16,0,1.4045503616333008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,24,4,128,1,float16,float16,0,0.040747201442718504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,24,1,128,1,fp8,fp8,0,1.350268840789795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,24,2,128,1,float16,float16,0,1.428006362915039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,24,8,128,1,float16,fp8,0,0.1354864001274109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,24,2,128,1,float16,fp8,0,1.3630335807800293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,1,128,1,float16,fp8,0,0.06750079989433289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,24,2,128,1,fp8,fp8,0,1.3524224281311035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,24,4,128,1,float16,float16,0,1.4793871879577636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,24,4,128,1,float16,fp8,0,1.4234527587890624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,24,4,128,1,fp8,fp8,0,1.3999792098999024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,24,8,128,1,float16,fp8,0,1.4920656204223632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,24,8,128,1,fp8,fp8,0,1.4921695709228515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,24,128,1,float16,float16,0,1.1018367767333985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,24,1,128,1,float16,fp8,0,1.358409595489502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,24,128,1,float16,fp8,0,0.9506048202514649
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,24,128,1,fp8,fp8,0,0.9364784240722657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,1,128,1,float16,float16,0,0.7043263912200928
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,1,128,1,float16,fp8,0,0.6816304206848145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,2,128,1,float16,float16,0,0.7138495922088623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,1,128,1,fp8,fp8,0,0.6928976058959961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,2,128,1,float16,fp8,0,0.6871551990509033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,2,128,1,fp8,fp8,0,0.6842944145202636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,24,1,128,1,fp8,fp8,0,0.06675360202789307
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,4,128,1,float16,fp8,0,0.7063072204589844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,4,128,1,fp8,fp8,0,0.704963207244873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,8,128,1,float16,float16,0,0.8142576217651367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,8,128,1,float16,fp8,0,0.7570335865020752
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,8,128,1,fp8,fp8,0,0.7556191921234131
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,24,128,1,float16,float16,0,0.5510863780975341
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,24,128,1,float16,fp8,0,0.474835205078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,1,128,1,float16,float16,0,0.3611407995223999
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,24,128,1,fp8,fp8,0,0.4773392200469971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,1,128,1,float16,fp8,0,0.3483648061752319
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,1,128,1,fp8,fp8,0,0.3471519947052002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,2,128,1,float16,fp8,0,0.3497920036315918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,2,128,1,fp8,fp8,0,0.35062880516052247
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,4,128,1,float16,float16,0,0.3814912080764771
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,24,8,128,1,float16,float16,0,1.619476890563965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,4,128,1,float16,fp8,0,0.358951997756958
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,4,128,1,fp8,fp8,0,0.36185920238494873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,8,128,1,float16,float16,0,0.4136496067047119
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,8,128,1,float16,fp8,0,0.3846127986907959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,24,128,1,float16,float16,0,0.2830080032348633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,8,128,1,fp8,fp8,0,0.3861936092376709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,1,128,1,float16,float16,0,0.18712960481643676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,24,128,1,fp8,fp8,0,0.24594240188598632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,1,128,1,float16,fp8,0,0.1789423942565918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,1,128,1,fp8,fp8,0,0.1802880048751831
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,2,128,1,float16,float16,0,0.19060800075531006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,2,128,1,float16,fp8,0,0.1808735966682434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,2,128,1,fp8,fp8,0,0.17949440479278564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,4,128,1,float16,float16,0,0.19801759719848633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,4,128,1,fp8,fp8,0,0.18756480216979982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,8,128,1,float16,float16,0,0.21165759563446046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,8,128,1,float16,fp8,0,0.19967039823532104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,8,128,1,fp8,fp8,0,0.20013279914855958
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,24,4,128,1,float16,float16,0,0.745689582824707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,24,128,1,float16,fp8,0,0.1304527997970581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,24,128,1,fp8,fp8,0,0.13027679920196533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,1,128,1,float16,fp8,0,0.09602400064468383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,1,128,1,float16,float16,0,0.0998960018157959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,1,128,1,fp8,fp8,0,0.0959007978439331
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,2,128,1,float16,float16,0,0.10218559503555298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,2,128,1,float16,fp8,0,0.09623519778251648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,2,128,1,fp8,fp8,0,0.09613440036773682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,4,128,1,float16,float16,0,0.10662399530410767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,4,128,1,float16,fp8,0,0.0990880012512207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,4,128,1,fp8,fp8,0,0.09955040216445923
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,8,128,1,float16,float16,0,0.1141759991645813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,8,128,1,float16,fp8,0,0.10554239749908448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,24,128,1,float16,float16,0,0.08273280262947083
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,8,128,1,fp8,fp8,0,0.10554879903793335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,24,128,1,fp8,fp8,0,0.07232480049133301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,1,128,1,float16,float16,0,0.05622239708900452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,1,128,1,float16,fp8,0,0.05119680166244507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,1,128,1,fp8,fp8,0,0.05133119821548462
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,2,128,1,float16,float16,0,0.057545602321624756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,2,128,1,float16,fp8,0,0.05215039849281311
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,2,128,1,fp8,fp8,0,0.05197759866714478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,4,128,1,float16,float16,0,0.05936639904975891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,4,128,1,float16,fp8,0,0.054756802320480344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,4,128,1,fp8,fp8,0,0.05467360019683838
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,8,128,1,float16,float16,0,0.06350240111351013
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,8,128,1,float16,fp8,0,0.05915200114250183
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,8,128,1,fp8,fp8,0,0.05918239951133728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,24,128,1,float16,fp8,0,0.04188160002231598
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,24,128,1,float16,float16,0,0.04822559952735901
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,24,128,1,float16,fp8,0,0.24680640697479247
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,24,128,1,fp8,fp8,0,0.04199680089950562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,1,128,1,float16,float16,0,0.0310591995716095
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,1,128,1,float16,fp8,0,0.02959359884262085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,1,128,1,fp8,fp8,0,0.02972320020198822
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,2,128,1,float16,fp8,0,0.029556798934936523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,2,128,1,fp8,fp8,0,0.029686400294303895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,4,128,1,float16,float16,0,0.032183998823165895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,4,128,1,float16,fp8,0,0.031041601300239564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,4,128,1,fp8,fp8,0,0.031327998638153075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,8,128,1,float16,float16,0,0.03586879968643188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,24,4,128,1,float16,fp8,0,0.18706239461898805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,8,128,1,float16,fp8,0,0.032128000259399415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,8,128,1,fp8,fp8,0,0.031851199269294736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,24,128,1,float16,float16,0,0.027713599801063537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,24,128,1,float16,fp8,0,0.026438400149345398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,24,128,1,fp8,fp8,0,0.026155200600624085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,1,128,1,float16,float16,0,0.020452800393104553
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,1,128,1,fp8,fp8,0,0.01926880031824112
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,2,128,1,float16,float16,0,0.020662400126457214
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,2,128,1,float16,fp8,0,0.01937599927186966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,4,128,1,float16,float16,0,0.02073120027780533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,4,128,1,float16,fp8,0,0.02144159972667694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,4,128,1,fp8,fp8,0,0.02125599980354309
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,8,128,1,float16,float16,0,0.02140959948301315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,8,128,1,float16,fp8,0,0.021879999339580535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,8,128,1,fp8,fp8,0,0.021660800278186797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,24,128,1,float16,float16,0,0.01855359971523285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,24,128,1,float16,fp8,0,0.01835999935865402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,24,128,1,fp8,fp8,0,0.018432000279426576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,1,128,1,float16,float16,0,0.01693439930677414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,1,128,1,float16,fp8,0,0.01711200028657913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,24,2,128,1,float16,float16,0,0.3648335933685303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,1,128,1,fp8,fp8,0,0.01706880033016205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,2,128,1,float16,float16,0,0.016966399550437928
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,2,128,1,float16,fp8,0,0.01688639968633652
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,2,128,1,fp8,fp8,0,0.017209599912166595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,4,128,1,float16,float16,0,0.017003199458122252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,4,128,1,float16,fp8,0,0.017393599450588226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,4,128,1,fp8,fp8,0,0.017417599260807038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,8,128,1,float16,float16,0,0.017377600073814392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,8,128,1,float16,fp8,0,0.01757279932498932
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,24,8,128,1,fp8,fp8,0,0.01746399998664856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,24,2,128,1,float16,float16,0,0.03161759972572327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,24,1,128,1,float16,float16,0,0.5642144203186035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,24,1,128,1,float16,fp8,0,0.5697951793670655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,24,1,128,1,fp8,fp8,0,0.5699696063995361
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,1,128,1,float16,fp8,0,0.01945279985666275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,24,24,128,1,float16,float16,0,0.14877920150756835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,24,2,128,1,float16,float16,0,0.5747968196868897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,24,2,128,1,float16,fp8,0,0.571288013458252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,24,2,128,1,fp8,fp8,0,0.5714191913604736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,24,4,128,1,float16,float16,0,0.607377576828003
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,24,4,128,1,float16,fp8,0,0.5948815822601319
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,24,8,128,1,float16,float16,0,0.6729472160339356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,24,8,128,1,float16,fp8,0,0.6426623821258545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,24,8,128,1,fp8,fp8,0,0.6433504104614258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,24,128,1,float16,float16,0,0.480401611328125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,24,128,1,float16,fp8,0,0.40194239616394045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,24,128,1,fp8,fp8,0,0.40106401443481443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,1,128,1,float16,fp8,0,0.29110078811645507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,1,128,1,fp8,fp8,0,0.291756796836853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,2,128,1,float16,float16,0,0.2943311929702759
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,2,128,1,float16,fp8,0,0.2922735929489136
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,2,128,1,fp8,fp8,0,0.2928719997406006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,4,128,1,float16,float16,0,0.3100143909454346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,4,128,1,float16,fp8,0,0.30400800704956055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,4,128,1,fp8,fp8,0,0.304803204536438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,8,128,1,float16,float16,0,0.3433536052703857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,8,128,1,float16,fp8,0,0.3282111883163452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,8,128,1,fp8,fp8,0,0.32842719554901123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,24,2,128,1,fp8,fp8,0,0.019462400674819948
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,24,128,1,float16,float16,0,0.24845919609069825
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,24,128,1,float16,fp8,0,0.20779039859771728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,24,128,1,fp8,fp8,0,0.20757439136505126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,1,128,1,float16,float16,0,0.1517135977745056
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,1,128,1,float16,fp8,0,0.15219199657440186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,1,128,1,fp8,fp8,0,0.15200480222702026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,2,128,1,float16,float16,0,0.1541152000427246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,2,128,1,float16,fp8,0,0.15297119617462157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,2,128,1,fp8,fp8,0,0.15267839431762695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,4,128,1,float16,float16,0,0.16210559606552125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,4,128,1,float16,fp8,0,0.15886080265045166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,24,4,128,1,fp8,fp8,0,0.59585599899292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,4,128,1,fp8,fp8,0,0.15934560298919678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,24,24,128,1,float16,fp8,0,0.07165600061416626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,24,128,1,float16,fp8,0,0.11077120304107665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,24,128,1,float16,float16,0,0.1319551944732666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,24,128,1,fp8,fp8,0,0.1109328031539917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,1,128,1,float16,float16,0,0.08176000118255615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,1,128,1,float16,fp8,0,0.08236479759216309
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,2,128,1,float16,float16,0,0.08377439975738525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,2,128,1,fp8,fp8,0,0.08254560232162475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,4,128,1,float16,float16,0,0.08782079815864563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,24,1,128,1,float16,float16,0,0.28902881145477294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,4,128,1,float16,fp8,0,0.08550400137901307
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,8,128,1,float16,fp8,0,0.09161760210990906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,8,128,1,float16,float16,0,0.09624639749526978
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,8,128,1,fp8,fp8,0,0.09187039732933044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,24,128,1,float16,float16,0,0.07333920001983643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,24,128,1,float16,fp8,0,0.06180800199508667
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,24,128,1,fp8,fp8,0,0.06187679767608643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,1,128,1,float16,float16,0,0.04673120081424713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,1,128,1,float16,fp8,0,0.044521600008010864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,1,128,1,fp8,fp8,0,0.04442879855632782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,2,128,1,float16,float16,0,0.048456001281738284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,2,128,1,fp8,fp8,0,0.045291200280189514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,4,128,1,float16,float16,0,0.050425601005554196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,4,128,1,float16,fp8,0,0.04722239971160889
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,4,128,1,fp8,fp8,0,0.04721119999885559
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,8,128,1,float16,float16,0,0.05455679893493652
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,8,128,1,float16,fp8,0,0.052215999364852904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,8,128,1,fp8,fp8,0,0.051876801252365115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,24,128,1,float16,float16,0,0.04203679859638214
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,24,128,1,float16,fp8,0,0.03521760106086731
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,24,128,1,fp8,fp8,0,0.035390400886535646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,1,128,1,float16,float16,0,0.02675839960575104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,1,128,1,float16,fp8,0,0.026342400908470155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,8,128,1,float16,float16,0,0.17852959632873536
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,1,128,1,fp8,fp8,0,0.02627359926700592
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,8,128,1,fp8,fp8,0,0.17097439765930175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,2,128,1,float16,float16,0,0.026731199026107787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,2,128,1,fp8,fp8,0,0.02624959945678711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,1,128,1,fp8,fp8,0,0.08243839740753174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,4,128,1,float16,float16,0,0.02714880108833313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,4,128,1,float16,fp8,0,0.026627200841903686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,2,128,1,float16,fp8,0,0.0823632001876831
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,4,128,1,fp8,fp8,0,0.026547199487686156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,8,128,1,float16,float16,0,0.030675199627876282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,24,128,1,float16,float16,0,0.023203200101852416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,24,128,1,float16,fp8,0,0.02144320011138916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,24,4,128,1,fp8,fp8,0,0.08586080074310302
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,24,128,1,fp8,fp8,0,0.02128159999847412
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,1,128,1,float16,fp8,0,0.019670400023460387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,1,128,1,fp8,fp8,0,0.019131200015544893
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,2,128,1,float16,float16,0,0.018691200017929076
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,24,2,128,1,float16,fp8,0,0.04537439942359924
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,2,128,1,fp8,fp8,0,0.019414399564266206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,4,128,1,float16,float16,0,0.019089600443840025
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,4,128,1,float16,fp8,0,0.019704000651836397
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,4,128,1,fp8,fp8,0,0.019756799936294554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,8,128,1,float16,float16,0,0.01968639940023422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,8,128,1,float16,fp8,0,0.019857600331306458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,8,128,1,fp8,fp8,0,0.01987680047750473
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,24,128,1,float16,float16,0,0.01578560024499893
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,24,128,1,float16,fp8,0,0.015503999590873719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,24,128,1,fp8,fp8,0,0.015667200088500977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,1,128,1,float16,float16,0,0.014132800698280334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,1,128,1,float16,fp8,0,0.014395199716091156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,1,128,1,fp8,fp8,0,0.014499199390411378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,2,128,1,float16,float16,0,0.014120000600814819
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,24,8,128,1,float16,fp8,0,0.17058080434799194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,2,128,1,float16,fp8,0,0.014459200203418732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,2,128,1,fp8,fp8,0,0.014256000518798828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,2,128,1,float16,fp8,0,0.026185598969459534
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,4,128,1,float16,fp8,0,0.014483200013637542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,4,128,1,fp8,fp8,0,0.014587199687957764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,8,128,1,float16,float16,0,0.01451359987258911
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,8,128,1,fp8,fp8,0,0.014636799693107605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,24,128,1,float16,float16,0,0.014716799557209014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,8,128,1,fp8,fp8,0,0.0286655992269516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,24,128,1,float16,fp8,0,0.014982399344444276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,1,128,1,float16,float16,0,0.018409599363803864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,24,128,1,fp8,fp8,0,0.014692799746990204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,1,128,1,float16,float16,0,0.013553600013256072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,1,128,1,float16,fp8,0,0.013851200044155122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,2,128,1,float16,float16,0,0.013512000441551208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,24,2,128,1,float16,fp8,0,0.019020800292491914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,2,128,1,fp8,fp8,0,0.013910399377346038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,4,128,1,float16,float16,0,0.013758400082588195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,4,128,1,float16,fp8,0,0.014220799505710601
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,4,128,1,fp8,fp8,0,0.014281600713729858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,8,128,1,float16,float16,0,0.014001600444316864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,8,128,1,fp8,fp8,0,0.014379200339317322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,24,1,128,1,float16,float16,0,0.2713615894317627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,24,1,128,1,float16,fp8,0,0.28385438919067385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,24,1,128,1,fp8,fp8,0,0.28318080902099607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,24,2,128,1,float16,float16,0,0.2771231889724731
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,24,2,128,1,float16,fp8,0,0.2855295896530151
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,4,128,1,float16,float16,0,0.01419679969549179
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,24,2,128,1,fp8,fp8,0,0.28619520664215087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,24,8,128,1,float16,fp8,0,0.029102399945259094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,24,4,128,1,float16,float16,0,0.29033598899841306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,1,128,1,fp8,fp8,0,0.013967999815940857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,24,4,128,1,fp8,fp8,0,0.29723520278930665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,2,128,1,float16,fp8,0,0.014000000059604644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,24,8,128,1,float16,float16,0,0.32084639072418214
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,24,8,128,1,float16,fp8,0,0.01451839953660965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,24,128,1,float16,fp8,0,0.20917439460754395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,24,128,1,float16,float16,0,0.23995039463043213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,24,8,128,1,fp8,fp8,0,0.31959359645843505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,24,8,128,1,float16,fp8,0,0.3198479890823364
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,24,128,1,fp8,fp8,0,0.2086400032043457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,1,128,1,float16,float16,0,0.14327839612960816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,1,128,1,float16,fp8,0,0.14880319833755493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,1,128,1,fp8,fp8,0,0.1482640027999878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,2,128,1,float16,float16,0,0.14504319429397583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,2,128,1,float16,fp8,0,0.1507904052734375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,2,128,1,fp8,fp8,0,0.1505136013031006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,4,128,1,float16,float16,0,0.1533679962158203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,4,128,1,float16,fp8,0,0.15539519786834716
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,4,128,1,fp8,fp8,0,0.15567840337753297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,8,128,1,float16,float16,0,0.16715840101242066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,8,128,1,fp8,fp8,0,0.16779520511627197
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,24,128,1,float16,fp8,0,0.10918079614639283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,1,128,1,float16,float16,0,0.07933440208435058
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,24,128,1,fp8,fp8,0,0.10947200059890747
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,24,128,1,float16,float16,0,0.1287008047103882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,1,128,1,float16,fp8,0,0.07918559908866882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,1,128,1,fp8,fp8,0,0.07952319979667663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,2,128,1,float16,fp8,0,0.08019840121269226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,2,128,1,float16,float16,0,0.08072479963302612
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,2,128,1,fp8,fp8,0,0.08039199709892272
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,4,128,1,fp8,fp8,0,0.08278560042381286
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,4,128,1,float16,fp8,0,0.08260800242424012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,8,128,1,float16,float16,0,0.09120159745216369
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,8,128,1,fp8,fp8,0,0.08872799873352051
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,24,128,1,float16,fp8,0,0.058638399839401244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,24,128,1,fp8,fp8,0,0.058657598495483396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,1,128,1,float16,fp8,0,0.04059999883174896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,1,128,1,fp8,fp8,0,0.040329599380493165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,24,8,128,1,float16,fp8,0,0.014619199931621552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,2,128,1,float16,float16,0,0.045844799280166625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,2,128,1,float16,fp8,0,0.04189760088920593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,4,128,1,float16,float16,0,0.04768480062484741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,24,4,128,1,float16,fp8,0,0.296729588508606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,4,128,1,fp8,fp8,0,0.043992000818252566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,8,128,1,float16,float16,0,0.05204480290412903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,8,128,1,float16,fp8,0,0.04766559898853302
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,8,128,1,fp8,fp8,0,0.047700798511505126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,24,128,1,float16,float16,0,0.039110401272773744
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,24,128,1,float16,fp8,0,0.0298335999250412
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,24,128,1,fp8,fp8,0,0.029736000299453735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,1,128,1,float16,float16,0,0.022073599696159362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,1,128,1,float16,fp8,0,0.02280000001192093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,24,8,128,1,float16,fp8,0,0.16833120584487915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,1,128,1,fp8,fp8,0,0.02279680073261261
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,2,128,1,float16,float16,0,0.022540800273418427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,2,128,1,float16,fp8,0,0.022937600314617158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,2,128,1,fp8,fp8,0,0.023052799701690673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,4,128,1,float16,float16,0,0.08402400016784668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,24,8,128,1,float16,fp8,0,0.08840960264205933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,4,128,1,float16,float16,0,0.02335519939661026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,24,128,1,float16,float16,0,0.07060959935188293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,8,128,1,float16,float16,0,0.027137601375579835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,4,128,1,fp8,fp8,0,0.023214399814605713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,1,128,1,float16,float16,0,0.04392800033092499
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,8,128,1,float16,fp8,0,0.02393600046634674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,8,128,1,fp8,fp8,0,0.024120000004768372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,24,128,1,float16,fp8,0,0.019556799530982973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,24,128,1,fp8,fp8,0,0.019283199310302736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,1,128,1,float16,fp8,0,0.01780959963798523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,1,128,1,fp8,fp8,0,0.01765120029449463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,2,128,1,float16,float16,0,0.017455999553203583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,2,128,1,float16,fp8,0,0.017636799812316896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,2,128,1,fp8,fp8,0,0.01777759939432144
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,4,128,1,float16,float16,0,0.017505599558353423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,2,128,1,fp8,fp8,0,0.04153279960155487
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,4,128,1,fp8,fp8,0,0.0177824005484581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,8,128,1,float16,fp8,0,0.017961600422859193
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,8,128,1,float16,float16,0,0.018270400166511536
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,8,128,1,fp8,fp8,0,0.018063999712467194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,24,128,1,float16,float16,0,0.01436000019311905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,24,128,1,float16,fp8,0,0.013488000631332398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,24,128,1,fp8,fp8,0,0.013678400218486786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,1,128,1,float16,float16,0,0.012455999851226807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,1,128,1,float16,fp8,0,0.01255359947681427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,1,128,1,fp8,fp8,0,0.012980799376964568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,2,128,1,float16,float16,0,0.012342400103807449
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,2,128,1,float16,fp8,0,0.012798400223255157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,2,128,1,fp8,fp8,0,0.012591999769210816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,4,128,1,float16,float16,0,0.012582400441169738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,4,128,1,float16,fp8,0,0.012804800271987915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,4,128,1,fp8,fp8,0,0.01292479932308197
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,8,128,1,float16,float16,0,0.012836800515651703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,8,128,1,float16,fp8,0,0.012880000472068786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,24,8,128,1,fp8,fp8,0,0.013129599392414093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,24,128,1,float16,float16,0,0.013208000361919403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,24,128,1,float16,fp8,0,0.0131632000207901
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,24,128,1,fp8,fp8,0,0.01308799982070923
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,1,128,1,float16,float16,0,0.012267199903726577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,1,128,1,float16,fp8,0,0.012516799569129943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,1,128,1,fp8,fp8,0,0.012508800625801087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,2,128,1,float16,float16,0,0.012044800072908401
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,2,128,1,float16,fp8,0,0.012516799569129943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,2,128,1,fp8,fp8,0,0.012468799948692322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,4,128,1,float16,float16,0,0.012398400157690049
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,4,128,1,float16,fp8,0,0.012294399738311767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,4,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,8,128,1,float16,float16,0,0.012409599870443344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,8,128,1,float16,fp8,0,0.012731200456619263
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,24,8,128,1,fp8,fp8,0,0.012444800138473511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,24,128,1,float16,float16,0,0.012647999823093415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,24,128,1,float16,fp8,0,0.01279519945383072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,1,128,1,float16,float16,0,0.012078399956226348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,1,128,1,float16,fp8,0,0.012321600317955017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,24,4,128,1,float16,fp8,0,0.02319840043783188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,24,128,1,float16,float16,0,0.02170239984989166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,2,128,1,float16,float16,0,0.011694400012493134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,1,128,1,float16,float16,0,0.017228800058364867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,2,128,1,float16,fp8,0,0.012244799733161926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,4,128,1,float16,fp8,0,0.012139199674129486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,4,128,1,float16,float16,0,0.012051200121641159
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,4,128,1,fp8,fp8,0,0.012104000151157378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,8,128,1,float16,float16,0,0.012252800166606903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,8,128,1,float16,fp8,0,0.012291199713945388
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,8,128,1,fp8,fp8,0,0.012478400021791458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,24,1,128,1,float16,float16,0,0.14143840074539185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,24,1,128,1,float16,fp8,0,0.1494639992713928
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,24,4,128,1,float16,fp8,0,0.04433279931545257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,24,1,128,1,fp8,fp8,0,0.14635679721832276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,24,2,128,1,float16,float16,0,0.14683519601821898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,24,2,128,1,float16,fp8,0,0.15047359466552734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,24,2,128,1,fp8,fp8,0,0.14882400035858154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,24,4,128,1,float16,float16,0,0.15447360277175903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,24,4,128,1,float16,fp8,0,0.15546720027923583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,24,4,128,1,fp8,fp8,0,0.1545215964317322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,24,8,128,1,float16,float16,0,0.19705599546432495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,24,8,128,1,float16,fp8,0,0.19736959934234619
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,24,8,128,1,fp8,fp8,0,0.19551680088043213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,24,128,1,float16,float16,0,0.1484112024307251
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,24,128,1,float16,fp8,0,0.11677919626235962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,24,128,1,fp8,fp8,0,0.1162608027458191
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,1,128,1,float16,float16,0,0.07848479747772216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,1,128,1,float16,fp8,0,0.08039519786834717
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,1,128,1,fp8,fp8,0,0.0796064019203186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,2,128,1,float16,float16,0,0.08070560097694397
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,2,128,1,float16,fp8,0,0.08096799850463868
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,2,128,1,fp8,fp8,0,0.08054400086402894
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,4,128,1,float16,float16,0,0.08492799997329711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,4,128,1,float16,fp8,0,0.08290560245513916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,4,128,1,fp8,fp8,0,0.08265280127525329
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,8,128,1,float16,fp8,0,0.10414079427719117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,24,128,1,fp8,fp8,0,0.01279519945383072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,24,128,1,float16,float16,0,0.08036959767341614
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,24,128,1,float16,fp8,0,0.06321759819984436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,1,128,1,float16,float16,0,0.044607999920845035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,24,128,1,fp8,fp8,0,0.06319040060043335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,1,128,1,float16,fp8,0,0.04021919965744018
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,1,128,1,fp8,fp8,0,0.012436799705028534
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,1,128,1,fp8,fp8,0,0.040571200847625735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,2,128,1,float16,float16,0,0.045819199085235594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,2,128,1,float16,fp8,0,0.04184480011463165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,4,128,1,float16,float16,0,0.04782240092754364
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,24,2,128,1,fp8,fp8,0,0.012081599980592727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,4,128,1,fp8,fp8,0,0.044207999110221864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,8,128,1,float16,float16,0,0.0585968017578125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,8,128,1,float16,fp8,0,0.05570719838142395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,8,128,1,fp8,fp8,0,0.05565599799156189
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,24,4,128,1,float16,fp8,0,0.017868800461292265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,24,128,1,float16,float16,0,0.043880000710487366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,24,128,1,float16,fp8,0,0.03157599866390228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,24,128,1,fp8,fp8,0,0.031070399284362792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,1,128,1,float16,float16,0,0.022303999960422517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,1,128,1,float16,fp8,0,0.023470400273799895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,1,128,1,fp8,fp8,0,0.023270399868488313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,2,128,1,float16,float16,0,0.022574399411678315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,2,128,1,fp8,fp8,0,0.023313599824905395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,4,128,1,float16,float16,0,0.02327519953250885
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,4,128,1,float16,fp8,0,0.02353599965572357
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,4,128,1,fp8,fp8,0,0.023470400273799895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,8,128,1,float16,float16,0,0.030251199007034303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,8,128,1,float16,fp8,0,0.028148800134658813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,8,128,1,fp8,fp8,0,0.028275200724601747
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,24,128,1,float16,float16,0,0.020606400072574617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,24,128,1,float16,fp8,0,0.01727840006351471
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,24,128,1,fp8,fp8,0,0.017422400414943695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,1,128,1,float16,float16,0,0.016752000153064727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,1,128,1,float16,fp8,0,0.017584000527858735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,1,128,1,fp8,fp8,0,0.01736319959163666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,2,128,1,float16,float16,0,0.017110399901866913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,2,128,1,float16,fp8,0,0.01758880019187927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,2,128,1,fp8,fp8,0,0.0176816001534462
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,4,128,1,float16,float16,0,0.01736160069704056
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,4,128,1,float16,fp8,0,0.01769919991493225
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,4,128,1,fp8,fp8,0,0.017731200158596038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,8,128,1,float16,float16,0,0.017508800327777862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,8,128,1,float16,fp8,0,0.017772799730300902
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,24,128,1,float16,float16,0,0.015356799960136414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,24,128,1,float16,fp8,0,0.013444800674915314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,24,128,1,fp8,fp8,0,0.013449600338935852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,1,128,1,float16,float16,0,0.012371200323104858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,1,128,1,float16,fp8,0,0.012615999579429627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,8,128,1,float16,float16,0,0.10566560029983521
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,24,8,128,1,fp8,fp8,0,0.10462720394134521
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,2,128,1,float16,float16,0,0.01268640011548996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,2,128,1,fp8,fp8,0,0.012963199615478515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,4,128,1,float16,float16,0,0.012734399735927581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,4,128,1,float16,fp8,0,0.013078400492668152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,4,128,1,fp8,fp8,0,0.012929600477218629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,2,128,1,fp8,fp8,0,0.04179680049419403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,8,128,1,float16,float16,0,0.012960000336170197
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,24,4,128,1,float16,fp8,0,0.04422720074653626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,8,128,1,fp8,fp8,0,0.0131632000207901
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,24,128,1,float16,float16,0,0.011390399932861329
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,24,128,1,float16,fp8,0,0.010886400192975997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,24,128,1,fp8,fp8,0,0.010809600353240967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,1,128,1,float16,float16,0,0.012004800140857697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,1,128,1,float16,fp8,0,0.012222400307655335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,2,128,1,float16,float16,0,0.011998400092124939
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,24,2,128,1,float16,fp8,0,0.0234607994556427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,2,128,1,fp8,fp8,0,0.012521600723266602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,4,128,1,float16,float16,0,0.012164799869060517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,4,128,1,float16,fp8,0,0.012443199753761292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,4,128,1,fp8,fp8,0,0.012388800084590913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,8,128,1,float16,float16,0,0.012289600074291229
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,8,128,1,float16,fp8,0,0.012548799812793731
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,8,128,1,fp8,fp8,0,0.012561599910259246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,24,128,1,float16,float16,0,0.010702399909496308
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,24,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,24,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,1,128,1,float16,float16,0,0.011827199906110763
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,1,128,1,float16,fp8,0,0.01226079985499382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,1,128,1,fp8,fp8,0,0.012251199781894683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,2,128,1,float16,float16,0,0.012041600048542022
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,2,128,1,fp8,fp8,0,0.012265600264072418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,4,128,1,float16,float16,0,0.011934400349855424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,4,128,1,float16,fp8,0,0.01239679977297783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,1,128,1,fp8,fp8,0,0.012915199995040894
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,4,128,1,fp8,fp8,0,0.012491200119256973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,8,128,1,float16,float16,0,0.012011200189590454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,8,128,1,fp8,fp8,0,0.012294399738311767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,24,128,1,float16,float16,0,0.010660800337791442
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,24,128,1,float16,fp8,0,0.010331200063228607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,24,128,1,fp8,fp8,0,0.010288000106811523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,1,128,1,float16,float16,0,0.011497599631547928
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,1,128,1,float16,fp8,0,0.011766400188207626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,8,128,1,float16,fp8,0,0.01311360001564026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,1,128,1,fp8,fp8,0,0.01178399994969368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,2,128,1,float16,float16,0,0.011907199770212174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,2,128,1,float16,fp8,0,0.0119439996778965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,2,128,1,fp8,fp8,0,0.012088000029325485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,4,128,1,float16,float16,0,0.011753600090742111
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,1,128,1,fp8,fp8,0,0.012356799840927125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,8,128,1,float16,float16,0,0.011843200027942657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,8,128,1,float16,fp8,0,0.011886399984359742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,24,2,128,1,float16,fp8,0,0.012494400143623352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,8,128,1,fp8,fp8,0,0.012055999785661697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,24,1,128,1,float16,float16,0,0.07870079874992371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,24,1,128,1,float16,fp8,0,0.08073599934577942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,24,1,128,1,fp8,fp8,0,0.0807536005973816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,24,2,128,1,float16,float16,0,0.08012959957122803
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,24,2,128,1,float16,fp8,0,0.08080160021781921
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,24,2,128,1,fp8,fp8,0,0.08099359869956971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,24,8,128,1,fp8,fp8,0,0.017772799730300902
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,24,4,128,1,float16,float16,0,0.09805759787559509
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,2,128,1,float16,fp8,0,0.01242239996790886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,24,4,128,1,float16,fp8,0,0.10036799907684327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,24,4,128,1,fp8,fp8,0,0.09962720274925232
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,24,8,128,1,float16,fp8,0,0.10605759620666504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,24,8,128,1,fp8,fp8,0,0.10517120361328125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,24,8,128,1,float16,float16,0,0.10782879590988159
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,24,2,128,1,float16,fp8,0,0.012918399274349212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,24,128,1,float16,float16,0,0.10883840322494506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,24,128,1,float16,fp8,0,0.09365919828414918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,1,128,1,float16,float16,0,0.04532159864902496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,1,128,1,float16,fp8,0,0.04112800061702728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,1,128,1,fp8,fp8,0,0.04060960114002228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,2,128,1,float16,float16,0,0.046587198972702026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,2,128,1,float16,fp8,0,0.04217120110988617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,2,128,1,fp8,fp8,0,0.04191359877586365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,4,128,1,float16,float16,0,0.05568479895591736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,4,128,1,float16,fp8,0,0.053358399868011476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,4,128,1,fp8,fp8,0,0.05312479734420776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,8,128,1,float16,float16,0,0.06028320193290711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,8,128,1,float16,fp8,0,0.05713919997215271
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,8,128,1,fp8,fp8,0,0.056897598505020144
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,24,128,1,float16,float16,0,0.05917919874191284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,4,128,1,fp8,fp8,0,0.012035199999809265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,24,128,1,float16,fp8,0,0.047875198721885684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,1,128,1,float16,float16,0,0.02265920042991638
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,1,128,1,fp8,fp8,0,0.023740799725055696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,2,128,1,float16,float16,0,0.02292319983243942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,2,128,1,float16,fp8,0,0.023716799914836884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,2,128,1,fp8,fp8,0,0.023636800050735474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,4,128,1,float16,float16,0,0.027233600616455078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,4,128,1,float16,fp8,0,0.028014400601387025
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,4,128,1,fp8,fp8,0,0.02800160050392151
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,8,128,1,float16,float16,0,0.030096000432968138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,8,128,1,float16,fp8,0,0.028491199016571045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,8,128,1,fp8,fp8,0,0.02834399938583374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,24,128,1,float16,fp8,0,0.025766399502754212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,24,128,1,fp8,fp8,0,0.025824001431465148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,1,128,1,float16,float16,0,0.017452800273895265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,1,128,1,float16,fp8,0,0.017961600422859193
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,1,128,1,fp8,fp8,0,0.017951999604701997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,2,128,1,float16,float16,0,0.017532800137996674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,2,128,1,float16,fp8,0,0.01790879964828491
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,2,128,1,fp8,fp8,0,0.01796640008687973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,4,128,1,float16,float16,0,0.017476800084114074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,4,128,1,float16,fp8,0,0.018028800189495087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,24,8,128,1,float16,fp8,0,0.01241919994354248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,4,128,1,fp8,fp8,0,0.018059200048446654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,8,128,1,float16,fp8,0,0.018182399868965148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,24,24,128,1,fp8,fp8,0,0.09281119704246521
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,8,128,1,fp8,fp8,0,0.01820160001516342
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,24,128,1,float16,fp8,0,0.01648000031709671
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,24,128,1,fp8,fp8,0,0.01650879979133606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,1,128,1,float16,float16,0,0.012513600289821625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,1,128,1,float16,fp8,0,0.012598399817943574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,1,128,1,fp8,fp8,0,0.012828800082206725
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,24,4,128,1,float16,fp8,0,0.012089599668979645
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,24,128,1,fp8,fp8,0,0.04817599952220917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,24,1,128,1,float16,fp8,0,0.023558400571346283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,2,128,1,fp8,fp8,0,0.012870399653911591
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,4,128,1,float16,float16,0,0.01250080019235611
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,4,128,1,float16,fp8,0,0.012943999469280243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,4,128,1,fp8,fp8,0,0.012673600018024445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,8,128,1,float16,float16,0,0.013048000633716583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,8,128,1,float16,fp8,0,0.012753599882125854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,24,128,1,float16,float16,0,0.013755199313163758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,24,128,1,float16,float16,0,0.028622400760650635
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,24,128,1,float16,fp8,0,0.01311199963092804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,1,128,1,float16,float16,0,0.012100800126791
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,1,128,1,float16,fp8,0,0.012457600235939026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,1,128,1,fp8,fp8,0,0.012328000366687774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,2,128,1,float16,float16,0,0.012091200053691863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,2,128,1,float16,fp8,0,0.012249600142240524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,2,128,1,fp8,fp8,0,0.012435200065374375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,4,128,1,float16,float16,0,0.012135999649763108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,4,128,1,float16,fp8,0,0.012484800070524216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,4,128,1,fp8,fp8,0,0.012460800260305405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,8,128,1,float16,float16,0,0.012216000258922577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,8,128,1,float16,fp8,0,0.012441600114107132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,8,128,1,fp8,fp8,0,0.012481600046157837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,24,128,1,float16,float16,0,0.01091040000319481
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,24,128,1,float16,fp8,0,0.01064319983124733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,24,8,128,1,float16,float16,0,0.01807519942522049
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,24,128,1,fp8,fp8,0,0.010657600313425063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,1,128,1,float16,float16,0,0.011740799993276596
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,1,128,1,float16,fp8,0,0.012139199674129486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,2,128,1,float16,fp8,0,0.012124799937009812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,2,128,1,fp8,fp8,0,0.012115199863910676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,4,128,1,float16,float16,0,0.011772800236940384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,4,128,1,float16,fp8,0,0.012332800030708312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,2,128,1,float16,float16,0,0.012427199631929398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,4,128,1,fp8,fp8,0,0.012352000176906585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,2,128,1,float16,fp8,0,0.01300159990787506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,8,128,1,float16,fp8,0,0.012217599898576736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,8,128,1,fp8,fp8,0,0.012060800194740295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,24,128,1,float16,float16,0,0.010529600083827972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,24,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,24,128,1,fp8,fp8,0,0.010603199899196624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,1,128,1,float16,float16,0,0.011963199824094772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,24,24,128,1,fp8,fp8,0,0.013115200400352477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,1,128,1,fp8,fp8,0,0.012139199674129486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,2,128,1,float16,float16,0,0.011771199852228164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,2,128,1,float16,fp8,0,0.011876799911260606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,2,128,1,fp8,fp8,0,0.011769600212574005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,4,128,1,float16,float16,0,0.011497599631547928
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,4,128,1,float16,fp8,0,0.012036799639463424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,4,128,1,fp8,fp8,0,0.01188800036907196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,8,128,1,float16,float16,0,0.011383999884128571
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,8,128,1,float16,fp8,0,0.011912000179290772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,8,128,1,fp8,fp8,0,0.012009599804878235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,24,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,24,128,1,float16,fp8,0,0.010331200063228607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,24,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,1,128,1,float16,float16,0,0.011481600254774094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,1,128,1,float16,fp8,0,0.011710400134325028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,1,128,1,fp8,fp8,0,0.011667200177907944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,24,128,1,float16,float16,0,0.01780959963798523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,2,128,1,float16,float16,0,0.011385600268840789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,2,128,1,float16,fp8,0,0.011840000003576278
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,2,128,1,fp8,fp8,0,0.011721599847078323
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,4,128,1,float16,float16,0,0.011478400230407715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,4,128,1,float16,fp8,0,0.011881600320339202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,4,128,1,fp8,fp8,0,0.0118367999792099
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,8,128,1,float16,float16,0,0.01141119971871376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,8,128,1,float16,fp8,0,0.011694400012493134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,24,1,128,1,float16,float16,0,0.0456959992647171
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,8,128,1,float16,float16,0,0.01197120025753975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,24,1,128,1,float16,fp8,0,0.04255360066890716
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,24,1,128,1,fp8,fp8,0,0.04266240000724793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,24,2,128,1,float16,float16,0,0.053862398862838744
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,24,2,128,1,fp8,fp8,0,0.05239199995994568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,24,8,128,1,fp8,fp8,0,0.013084800541400909
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,24,4,128,1,float16,float16,0,0.056435197591781616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,24,4,128,1,float16,fp8,0,0.05497599840164184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,24,1,128,1,float16,fp8,0,0.012214399874210358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,24,4,128,1,fp8,fp8,0,0.05446400046348572
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,24,8,128,1,float16,float16,0,0.07234240174293519
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,24,8,128,1,float16,fp8,0,0.0670144021511078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,24,8,128,1,fp8,fp8,0,0.06696799993515015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,24,128,1,float16,float16,0,0.09100480079650879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,24,128,1,fp8,fp8,0,0.0798367977142334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,1,128,1,float16,float16,0,0.02343519926071167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,1,128,1,float16,fp8,0,0.025139200687408447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,1,128,1,fp8,fp8,0,0.024648000299930573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,2,128,1,float16,fp8,0,0.0286655992269516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,2,128,1,float16,float16,0,0.02725760042667389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,2,128,1,fp8,fp8,0,0.028787198662757873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,4,128,1,float16,float16,0,0.027796798944473268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,4,128,1,float16,fp8,0,0.028889599442481994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,4,128,1,fp8,fp8,0,0.029300799965858458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,8,128,1,float16,float16,0,0.036427199840545654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,8,128,1,float16,fp8,0,0.03369599878787995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,8,128,1,fp8,fp8,0,0.03351680040359497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,24,128,1,float16,float16,0,0.045716801285743715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,24,128,1,float16,fp8,0,0.04219839870929718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,24,128,1,fp8,fp8,0,0.042268800735473636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,1,128,1,float16,float16,0,0.017552000284194947
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,1,128,1,float16,fp8,0,0.018452799320220946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,1,128,1,fp8,fp8,0,0.018372799456119537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,2,128,1,float16,float16,0,0.017560000717639922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,2,128,1,float16,fp8,0,0.018265600502490997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,2,128,1,float16,float16,0,0.011767999827861786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,2,128,1,fp8,fp8,0,0.018460799753665925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,4,128,1,float16,fp8,0,0.01855359971523285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,4,128,1,fp8,fp8,0,0.018619200587272643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,8,128,1,float16,float16,0,0.021135999262332915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,8,128,1,float16,fp8,0,0.02054399996995926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,8,128,1,fp8,fp8,0,0.020294399559497835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,24,8,128,1,fp8,fp8,0,0.011772800236940384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,24,128,1,float16,fp8,0,0.024798400700092316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,24,128,1,fp8,fp8,0,0.024670399725437164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,1,128,1,float16,fp8,0,0.013236799836158752
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,1,128,1,fp8,fp8,0,0.013142399489879608
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,2,128,1,float16,float16,0,0.013023999333381654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,2,128,1,fp8,fp8,0,0.01324319988489151
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,4,128,1,float16,float16,0,0.012961600720882416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,4,128,1,float16,fp8,0,0.012996800243854523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,24,2,128,1,float16,fp8,0,0.05276319980621338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,4,128,1,fp8,fp8,0,0.013166399300098419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,8,128,1,float16,float16,0,0.01443679928779602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,8,128,1,float16,fp8,0,0.01363680064678192
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,8,128,1,fp8,fp8,0,0.013713599741458892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,24,128,1,float16,float16,0,0.01669600009918213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,24,128,1,float16,fp8,0,0.01594240069389343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,24,128,1,fp8,fp8,0,0.016011199355125426
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,1,128,1,float16,float16,0,0.011974400281906128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,1,128,1,float16,fp8,0,0.012598399817943574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,1,128,1,fp8,fp8,0,0.012297599762678146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,2,128,1,float16,float16,0,0.012356799840927125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,2,128,1,float16,fp8,0,0.01223360002040863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,2,128,1,fp8,fp8,0,0.01266079992055893
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,24,24,128,1,float16,fp8,0,0.08045759797096252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,4,128,1,float16,float16,0,0.012006399780511856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,4,128,1,float16,fp8,0,0.012252800166606903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,4,128,1,fp8,fp8,0,0.012518399953842163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,8,128,1,float16,float16,0,0.010780800133943558
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,8,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,24,8,128,1,fp8,fp8,0,0.010622400045394897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,24,128,1,float16,float16,0,0.013089600205421447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,24,128,1,float16,fp8,0,0.012950399518013
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,1,128,1,float16,float16,0,0.012123200297355651
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,1,128,1,float16,fp8,0,0.012291199713945388
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,1,128,1,fp8,fp8,0,0.012275200337171555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,2,128,1,float16,float16,0,0.011843200027942657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,2,128,1,float16,fp8,0,0.012303999811410903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,2,128,1,fp8,fp8,0,0.012203200161457062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,4,128,1,float16,float16,0,0.011959999799728394
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,24,1,128,1,fp8,fp8,0,0.012425599992275238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,4,128,1,float16,fp8,0,0.012390399724245072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,24,4,128,1,float16,float16,0,0.0174687996506691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,4,128,1,fp8,fp8,0,0.012324800342321396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,8,128,1,float16,float16,0,0.011006399989128113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,8,128,1,float16,fp8,0,0.010038399696350097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,24,128,1,float16,float16,0,0.010579200088977813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,24,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,1,128,1,float16,float16,0,0.01180799975991249
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,1,128,1,float16,float16,0,0.01279519945383072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,2,128,1,float16,fp8,0,0.01332319974899292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,2,128,1,float16,float16,0,0.011745599657297134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,2,128,1,float16,fp8,0,0.012057600170373916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,2,128,1,fp8,fp8,0,0.012049599736928939
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,4,128,1,float16,float16,0,0.011577600240707397
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,4,128,1,float16,fp8,0,0.011761599779129028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,4,128,1,fp8,fp8,0,0.01183359995484352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,8,128,1,float16,float16,0,0.010087999701499938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,8,128,1,float16,fp8,0,0.010239999741315842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,8,128,1,fp8,fp8,0,0.009984000027179718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,24,128,1,float16,float16,0,0.010595200210809707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,24,128,1,float16,fp8,0,0.010364799946546554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,24,128,1,fp8,fp8,0,0.010353600233793258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,1,128,1,float16,float16,0,0.011548800021409988
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,1,128,1,float16,fp8,0,0.011851199716329575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,1,128,1,fp8,fp8,0,0.011948800086975098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,2,128,1,float16,float16,0,0.011854399740695954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,24,128,1,fp8,fp8,0,0.012720000743865967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,2,128,1,fp8,fp8,0,0.011963199824094772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,4,128,1,float16,float16,0,0.011483199894428253
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,4,128,1,float16,fp8,0,0.011896000057458878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,4,128,1,fp8,fp8,0,0.011884800344705581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,8,128,1,float16,float16,0,0.00986879989504814
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,8,128,1,float16,fp8,0,0.009540800005197525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,8,128,1,fp8,fp8,0,0.009465599805116654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,24,128,1,float16,float16,0,0.01022080034017563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,24,128,1,float16,fp8,0,0.010185600072145463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,24,128,1,fp8,fp8,0,0.010185600072145463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,1,128,1,float16,float16,0,0.011380799859762192
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,24,8,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,1,128,1,float16,fp8,0,0.011865600198507308
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,24,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,24,24,128,1,float16,float16,0,0.026256000995635985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,2,128,1,float16,fp8,0,0.011939200013875962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,1,128,1,float16,fp8,0,0.012092799693346024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,4,128,1,float16,float16,0,0.011342400312423706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,24,1,128,1,fp8,fp8,0,0.012142399698495865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,4,128,1,fp8,fp8,0,0.011619199812412263
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,8,128,1,float16,float16,0,0.009838400036096573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,8,128,1,float16,fp8,0,0.009558399766683578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,8,128,1,fp8,fp8,0,0.009539200365543366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,24,1,128,1,float16,float16,0,0.01839040070772171
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,24,1,128,1,float16,fp8,0,0.017668800055980684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,24,1,128,1,fp8,fp8,0,0.017958399653434754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,24,2,128,1,float16,float16,0,0.023262399435043334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,24,2,128,1,float16,fp8,0,0.02337760031223297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,24,2,128,1,fp8,fp8,0,0.023148800432682037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,24,4,128,1,float16,float16,0,0.03464959859848023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,24,4,128,1,float16,fp8,0,0.03427680134773255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,24,4,128,1,fp8,fp8,0,0.034564799070358275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,24,8,128,1,float16,float16,0,0.05598719716072083
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,24,8,128,1,float16,fp8,0,0.0559391975402832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,24,8,128,1,fp8,fp8,0,0.05591520071029663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,24,128,1,float16,float16,0,0.07237600088119507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,24,128,1,float16,fp8,0,0.07181599736213684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,24,128,1,fp8,fp8,0,0.07187680006027222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,1,128,1,float16,float16,0,0.012614400684833526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,1,128,1,float16,fp8,0,0.01279039978981018
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,1,128,1,fp8,fp8,0,0.012563200294971466
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,2,128,1,float16,float16,0,0.015563200414180755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,2,128,1,float16,fp8,0,0.015281599760055543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,2,128,1,fp8,fp8,0,0.015462400019168853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,4,128,1,float16,float16,0,0.02083680033683777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,4,128,1,float16,fp8,0,0.02120479941368103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,4,128,1,fp8,fp8,0,0.020582400262355804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,8,128,1,float16,float16,0,0.0324864000082016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,8,128,1,float16,fp8,0,0.03161279857158661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,24,128,1,float16,float16,0,0.04038400053977966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,24,128,1,float16,fp8,0,0.04063200056552887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,24,128,1,fp8,fp8,0,0.0404911994934082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,1,128,1,float16,fp8,0,0.011428800225257874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,1,128,1,fp8,fp8,0,0.011134400218725204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,2,128,1,float16,float16,0,0.01127839982509613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,2,128,1,float16,fp8,0,0.011300799995660782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,1,128,1,fp8,fp8,0,0.011926399916410447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,2,128,1,fp8,fp8,0,0.011575999855995178
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,4,128,1,float16,float16,0,0.014180800318717957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,2,128,1,fp8,fp8,0,0.0118928000330925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,4,128,1,float16,fp8,0,0.014193600416183472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,4,128,1,fp8,fp8,0,0.01414559930562973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,8,128,1,float16,float16,0,0.019948799908161164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,8,128,1,float16,fp8,0,0.01968639940023422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,8,128,1,fp8,fp8,0,0.019676800072193145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,24,128,1,float16,float16,0,0.024223999679088594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,24,128,1,float16,fp8,0,0.023937599360942842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,24,128,1,fp8,fp8,0,0.023814399540424348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,1,128,1,float16,float16,0,0.010684800148010255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,1,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,1,128,1,fp8,fp8,0,0.010715200006961823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,2,128,1,float16,float16,0,0.010520000010728836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,2,128,1,float16,fp8,0,0.010566399991512298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,2,128,1,fp8,fp8,0,0.010331200063228607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,4,128,1,float16,float16,0,0.010710400342941285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,4,128,1,float16,fp8,0,0.010684800148010255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,4,128,1,fp8,fp8,0,0.010644800215959548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,8,128,1,float16,float16,0,0.01327199935913086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,8,128,1,fp8,fp8,0,0.013225600123405457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,24,128,1,float16,float16,0,0.015807999670505522
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,24,128,1,float16,fp8,0,0.015587200224399567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,24,128,1,fp8,fp8,0,0.01541599929332733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,1,128,1,float16,float16,0,0.009935999661684037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,1,128,1,float16,fp8,0,0.009815999865531921
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,1,128,1,fp8,fp8,0,0.009851200133562088
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,24,2,128,1,float16,fp8,0,0.012081599980592727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,2,128,1,float16,float16,0,0.009940800070762635
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,2,128,1,fp8,fp8,0,0.009916800260543823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,24,8,128,1,fp8,fp8,0,0.03160960078239441
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,4,128,1,float16,float16,0,0.010116799920797347
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,24,1,128,1,float16,float16,0,0.011129599809646607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,4,128,1,float16,fp8,0,0.009991999715566635
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,4,128,1,fp8,fp8,0,0.009998399764299393
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,8,128,1,float16,float16,0,0.010252799838781357
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,8,128,1,float16,fp8,0,0.010094399750232696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,24,128,1,float16,float16,0,0.01292479932308197
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,24,128,1,float16,fp8,0,0.012443199753761292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,24,128,1,fp8,fp8,0,0.012724800407886505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,1,128,1,float16,float16,0,0.009763199836015701
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,1,128,1,float16,fp8,0,0.00945120006799698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,1,128,1,fp8,fp8,0,0.009593600034713745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,2,128,1,float16,float16,0,0.009814400225877762
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,2,128,1,float16,fp8,0,0.009440000355243682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,2,128,1,fp8,fp8,0,0.009678400307893752
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,4,128,1,float16,float16,0,0.009889599680900574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,4,128,1,float16,fp8,0,0.009702400118112565
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,4,128,1,fp8,fp8,0,0.009907200187444686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,8,128,1,float16,float16,0,0.009944000095129014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,8,128,1,float16,fp8,0,0.009920000284910201
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,24,8,128,1,fp8,fp8,0,0.00989760011434555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,24,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,24,128,1,float16,fp8,0,0.01053759977221489
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,24,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,1,128,1,float16,float16,0,0.009588800370693207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,1,128,1,float16,fp8,0,0.009332799911499023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,1,128,1,fp8,fp8,0,0.009415999799966813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,2,128,1,float16,float16,0,0.009471999853849411
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,4,128,1,float16,fp8,0,0.011774399876594543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,2,128,1,float16,fp8,0,0.009515199810266495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,2,128,1,fp8,fp8,0,0.009353599697351455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,4,128,1,float16,float16,0,0.009700799733400345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,4,128,1,fp8,fp8,0,0.009286399930715561
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,8,128,1,float16,float16,0,0.009764800220727921
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,8,128,1,float16,fp8,0,0.009563200175762177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,8,128,1,fp8,fp8,0,0.009737599641084671
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,24,128,1,float16,float16,0,0.010107199847698211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,24,128,1,float16,fp8,0,0.010139200091361999
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,24,128,1,fp8,fp8,0,0.010209599882364273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,1,128,1,float16,float16,0,0.009513600170612336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,1,128,1,fp8,fp8,0,0.009239999949932099
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,2,128,1,float16,float16,0,0.00947519987821579
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,2,128,1,float16,fp8,0,0.009323199838399887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,2,128,1,fp8,fp8,0,0.00939679965376854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,24,8,128,1,float16,fp8,0,0.013238400220870972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,4,128,1,float16,fp8,0,0.009764800220727921
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,4,128,1,fp8,fp8,0,0.00974079966545105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,8,128,1,float16,float16,0,0.00989760011434555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,8,128,1,float16,fp8,0,0.009763199836015701
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,8,128,1,fp8,fp8,0,0.009612800180912017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,24,128,1,float16,float16,0,0.010238400101661682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,24,128,1,float16,fp8,0,0.01003199964761734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,24,128,1,fp8,fp8,0,0.010065600275993347
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,2,128,1,float16,fp8,0,0.010215999931097031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,1,128,1,float16,float16,0,0.009596800059080124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,1,128,1,fp8,fp8,0,0.009467200189828873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,1,128,1,float16,fp8,0,0.009510400146245957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,2,128,1,float16,float16,0,0.009734400361776353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,2,128,1,float16,fp8,0,0.00936800017952919
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,2,128,1,fp8,fp8,0,0.009344000369310379
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,4,128,1,float16,float16,0,0.009623999893665313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,4,128,1,float16,fp8,0,0.009297599643468856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,4,128,1,fp8,fp8,0,0.009256000071763993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,8,128,1,float16,fp8,0,0.00966399982571602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,8,128,1,fp8,fp8,0,0.009625600278377533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,24,2,128,1,float16,float16,0,0.011508800089359283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,24,4,128,1,float16,fp8,0,0.009431999921798707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,1,128,1,float16,fp8,0,0.009344000369310379
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,24,4,128,1,float16,float16,0,0.009824000298976898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,24,8,128,1,fp8,fp8,0,0.010267200320959092
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,24,8,128,1,float16,float16,0,0.009736000001430512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,16,1,128,1,float16,fp8,0,11.95789794921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,16,1,128,1,fp8,fp8,0,11.768592071533202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,16,2,128,1,float16,fp8,0,12.062840270996094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,16,1,128,1,float16,float16,0,14.91425018310547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,16,2,128,1,fp8,fp8,0,11.867726135253907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,16,2,128,1,float16,float16,0,15.397859191894531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,16,4,128,1,float16,float16,0,15.088388061523437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,16,4,128,1,float16,fp8,0,12.095916748046875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,16,128,1,float16,float16,0,7.8089042663574215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,16,128,1,float16,fp8,0,6.207662582397461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,16,4,128,1,fp8,fp8,0,12.048136138916016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,16,128,1,fp8,fp8,0,6.0733695983886715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,16,8,128,1,float16,fp8,0,12.189425659179687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,16,8,128,1,fp8,fp8,0,11.964588928222657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,16,8,128,1,float16,float16,0,15.444917297363281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,1,128,1,float16,float16,0,7.571673583984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,1,128,1,fp8,fp8,0,6.01770248413086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,1,128,1,float16,fp8,0,6.005003356933594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,2,128,1,float16,fp8,0,5.895707321166992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,2,128,1,float16,float16,0,7.571784210205078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,2,128,1,fp8,fp8,0,5.993212890625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,4,128,1,float16,fp8,0,6.064332962036133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,4,128,1,float16,float16,0,7.902027130126953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,4,128,1,fp8,fp8,0,6.0996654510498045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,8,128,1,float16,fp8,0,6.02020492553711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,16,128,1,float16,fp8,0,3.105232048034668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,16,128,1,float16,float16,0,4.474371337890625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,8,128,1,float16,float16,0,8.029673767089843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,16,128,1,fp8,fp8,0,3.29713134765625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,1,128,1,float16,fp8,0,2.957342338562012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,1,128,1,float16,float16,0,3.977676773071289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,1,128,1,fp8,fp8,0,3.08636474609375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,2,128,1,float16,fp8,0,3.1646671295166016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,2,128,1,float16,float16,0,4.290169525146484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,2,128,1,fp8,fp8,0,3.3334815979003904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,16,8,128,1,fp8,fp8,0,6.012155151367187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,4,128,1,float16,float16,0,3.9160129547119142
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,4,128,1,float16,fp8,0,2.9975183486938475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,4,128,1,fp8,fp8,0,3.0638336181640624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,8,128,1,float16,fp8,0,2.9653663635253906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,16,128,1,float16,fp8,0,1.498591995239258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,16,128,1,float16,float16,0,1.8425743103027343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,8,128,1,float16,float16,0,4.004216003417969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,16,128,1,fp8,fp8,0,1.7819904327392577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,1,128,1,float16,fp8,0,1.4695311546325684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,16,8,128,1,fp8,fp8,0,3.400406265258789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,1,128,1,fp8,fp8,0,1.504371166229248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,2,128,1,float16,float16,0,1.6999471664428711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,2,128,1,fp8,fp8,0,1.6346960067749023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,4,128,1,float16,float16,0,1.7788288116455078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,4,128,1,float16,fp8,0,1.4652480125427245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,4,128,1,fp8,fp8,0,1.497214412689209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,8,128,1,float16,float16,0,1.7991775512695312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,8,128,1,float16,fp8,0,1.5876640319824218
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,8,128,1,fp8,fp8,0,1.502239990234375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,1,128,1,float16,float16,0,2.0157392501831053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,16,2,128,1,float16,fp8,0,1.6525455474853517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,16,1,128,1,float16,fp8,0,6.705340576171875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,16,1,128,1,fp8,fp8,0,6.848356628417969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,16,2,128,1,float16,fp8,0,6.726372528076172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,16,2,128,1,fp8,fp8,0,6.774422454833984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,16,1,128,1,float16,float16,0,8.603900909423828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,16,2,128,1,float16,float16,0,8.665665435791016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,16,4,128,1,float16,fp8,0,6.842937469482422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,16,4,128,1,float16,float16,0,8.76580810546875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,16,128,1,float16,float16,0,4.465663909912109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,16,128,1,float16,fp8,0,3.597492980957031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,16,4,128,1,fp8,fp8,0,6.783220672607422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,16,128,1,fp8,fp8,0,3.5337760925292967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,16,8,128,1,float16,fp8,0,6.818004608154297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,16,8,128,1,fp8,fp8,0,6.8814849853515625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,16,8,128,1,float16,float16,0,8.936087799072265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,1,128,1,float16,fp8,0,3.4464832305908204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,2,128,1,float16,fp8,0,3.4166385650634767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,2,128,1,float16,float16,0,4.152926254272461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,2,128,1,fp8,fp8,0,3.3300479888916015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,1,128,1,float16,float16,0,4.274391937255859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,4,128,1,float16,fp8,0,3.540768051147461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,4,128,1,float16,float16,0,4.238929748535156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,4,128,1,fp8,fp8,0,3.378224182128906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,1,128,1,fp8,fp8,0,3.372051239013672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,16,128,1,float16,float16,0,2.1453216552734373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,16,128,1,float16,fp8,0,1.7715343475341796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,8,128,1,float16,fp8,0,3.5083728790283204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,8,128,1,float16,float16,0,4.429963302612305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,16,128,1,fp8,fp8,0,1.9425552368164063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,16,8,128,1,fp8,fp8,0,3.399740982055664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,1,128,1,float16,fp8,0,1.6485776901245117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,1,128,1,float16,float16,0,2.039523124694824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,2,128,1,float16,fp8,0,1.6848415374755858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,1,128,1,fp8,fp8,0,1.8808864593505858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,2,128,1,float16,float16,0,2.0409807205200194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,2,128,1,fp8,fp8,0,1.9217056274414062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,4,128,1,float16,float16,0,2.0582399368286133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,4,128,1,float16,fp8,0,1.6686719894409179
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,4,128,1,fp8,fp8,0,1.649068832397461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,8,128,1,float16,float16,0,2.2436784744262694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,16,128,1,float16,float16,0,1.1816479682922363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,16,128,1,fp8,fp8,0,0.9900431632995605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,8,128,1,float16,fp8,0,1.7207311630249023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,1,128,1,float16,float16,0,0.9663663864135742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,16,8,128,1,fp8,fp8,0,1.7177263259887696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,1,128,1,float16,fp8,0,0.9760080337524414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,2,128,1,float16,float16,0,0.9876288414001465
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,2,128,1,float16,fp8,0,0.8933535575866699
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,4,128,1,float16,float16,0,0.9825872421264649
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,4,128,1,float16,fp8,0,0.9234047889709472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,4,128,1,fp8,fp8,0,0.9040160179138184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,8,128,1,float16,float16,0,1.012166404724121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,8,128,1,float16,fp8,0,0.9452783584594726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,8,128,1,fp8,fp8,0,0.8579584121704101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,16,128,1,float16,fp8,0,0.9326687812805176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,1,128,1,fp8,fp8,0,0.9300191879272461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,16,2,128,1,fp8,fp8,0,0.8977904319763184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,16,1,128,1,fp8,fp8,0,4.771766281127929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,16,1,128,1,float16,fp8,0,4.97192497253418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,16,1,128,1,float16,float16,0,6.22285270690918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,16,2,128,1,float16,fp8,0,4.769177627563477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,16,2,128,1,float16,float16,0,6.2770225524902346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,16,2,128,1,fp8,fp8,0,4.7577777862548825
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,16,4,128,1,float16,float16,0,5.780220794677734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,16,4,128,1,float16,fp8,0,4.726959991455078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,16,4,128,1,fp8,fp8,0,4.832024002075196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,16,128,1,float16,fp8,0,2.503976058959961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,16,128,1,float16,float16,0,3.068272018432617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,16,128,1,fp8,fp8,0,2.5586191177368165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,16,8,128,1,fp8,fp8,0,4.869364929199219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,16,8,128,1,float16,float16,0,6.306876754760742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,16,8,128,1,float16,fp8,0,5.025339126586914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,1,128,1,float16,fp8,0,2.356867218017578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,1,128,1,fp8,fp8,0,2.4075088500976562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,1,128,1,float16,float16,0,2.8700767517089845
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,2,128,1,float16,float16,0,2.9725744247436525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,2,128,1,float16,fp8,0,2.3577184677124023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,2,128,1,fp8,fp8,0,2.4011375427246096
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,4,128,1,float16,fp8,0,2.3985807418823244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,4,128,1,float16,float16,0,2.998566436767578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,4,128,1,fp8,fp8,0,2.393078422546387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,8,128,1,float16,fp8,0,2.5179136276245115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,16,128,1,float16,float16,0,1.4781599998474122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,16,128,1,fp8,fp8,0,1.246951961517334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,8,128,1,float16,float16,0,3.0474544525146485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,16,8,128,1,fp8,fp8,0,2.5349920272827147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,1,128,1,float16,fp8,0,1.1835536003112792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,1,128,1,float16,float16,0,1.4593168258666993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,1,128,1,fp8,fp8,0,1.1865887641906738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,2,128,1,float16,fp8,0,1.1725104331970215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,2,128,1,float16,float16,0,1.423472023010254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,2,128,1,fp8,fp8,0,1.3019200325012208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,4,128,1,float16,fp8,0,1.1874176025390626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,4,128,1,float16,float16,0,1.3856032371520997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,4,128,1,fp8,fp8,0,1.2733856201171876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,16,128,1,float16,float16,0,0.7850719928741455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,8,128,1,float16,fp8,0,1.1884719848632812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,8,128,1,float16,float16,0,1.4356032371520997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,16,128,1,float16,fp8,0,0.6336336135864258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,16,128,1,fp8,fp8,0,0.7260000228881835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,1,128,1,float16,fp8,0,0.6064144134521484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,1,128,1,fp8,fp8,0,0.6002960205078125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,2,128,1,float16,fp8,0,0.5862751960754394
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,2,128,1,float16,float16,0,0.732206392288208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,2,128,1,fp8,fp8,0,0.6253312110900879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,4,128,1,float16,fp8,0,0.5952591896057129
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,4,128,1,fp8,fp8,0,0.58853120803833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,8,128,1,float16,fp8,0,0.6199567794799805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,8,128,1,fp8,fp8,0,0.6036176204681396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,8,128,1,fp8,fp8,0,1.288203239440918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,1,128,1,float16,float16,0,0.6819744110107422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,16,16,128,1,float16,fp8,0,1.2490863800048828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,4,128,1,float16,float16,0,0.695531177520752
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,16,8,128,1,float16,float16,0,0.791926383972168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,16,1,128,1,float16,fp8,0,6.118635177612305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,16,1,128,1,fp8,fp8,0,6.196136093139648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,16,1,128,1,float16,float16,0,8.179017639160156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,16,2,128,1,float16,fp8,0,6.113294219970703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,16,2,128,1,fp8,fp8,0,6.193894577026367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,16,2,128,1,float16,float16,0,7.675027465820312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,16,4,128,1,float16,fp8,0,6.179496002197266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,16,4,128,1,float16,float16,0,7.9980621337890625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,16,4,128,1,fp8,fp8,0,6.179289627075195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,16,128,1,float16,fp8,0,3.2096080780029297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,16,128,1,float16,float16,0,4.139654541015625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,16,128,1,fp8,fp8,0,3.237287902832031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,16,8,128,1,float16,float16,0,8.151414489746093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,16,8,128,1,fp8,fp8,0,6.233710479736328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,1,128,1,float16,float16,0,3.8865936279296873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,1,128,1,float16,fp8,0,3.059449577331543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,1,128,1,fp8,fp8,0,3.02917594909668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,2,128,1,float16,fp8,0,3.0587791442871093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,2,128,1,float16,float16,0,3.939219284057617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,2,128,1,fp8,fp8,0,3.181110382080078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,16,8,128,1,float16,fp8,0,6.536988830566406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,4,128,1,float16,fp8,0,3.2171199798583983
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,4,128,1,fp8,fp8,0,3.0646640777587892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,8,128,1,float16,fp8,0,3.1761968612670897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,16,128,1,float16,float16,0,1.876420783996582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,8,128,1,float16,float16,0,3.9143230438232424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,16,128,1,float16,fp8,0,1.5790800094604491
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,16,128,1,fp8,fp8,0,1.6336048126220704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,1,128,1,float16,float16,0,1.8186288833618165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,8,128,1,fp8,fp8,0,3.2899967193603517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,1,128,1,float16,fp8,0,1.545729637145996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,1,128,1,fp8,fp8,0,1.5116432189941407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,2,128,1,float16,fp8,0,1.5199024200439453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,16,4,128,1,float16,float16,0,3.7973873138427736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,2,128,1,float16,float16,0,1.8804800033569335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,2,128,1,fp8,fp8,0,1.6633712768554687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,4,128,1,float16,float16,0,1.859377670288086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,16,128,1,float16,float16,0,0.9427056312561035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,4,128,1,float16,fp8,0,1.5266783714294434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,4,128,1,fp8,fp8,0,1.5280464172363282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,8,128,1,float16,fp8,0,1.5483936309814452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,16,128,1,float16,fp8,0,0.9161104202270508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,8,128,1,fp8,fp8,0,1.634859275817871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,1,128,1,float16,fp8,0,0.7706064224243164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,1,128,1,float16,float16,0,0.8777024269104003
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,1,128,1,fp8,fp8,0,0.7651616096496582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,2,128,1,float16,float16,0,0.932863998413086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,2,128,1,float16,fp8,0,0.7757855892181397
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,2,128,1,fp8,fp8,0,0.7647647857666016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,4,128,1,float16,float16,0,0.9543328285217285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,4,128,1,float16,fp8,0,0.7708032131195068
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,4,128,1,fp8,fp8,0,0.8319503784179687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,16,128,1,float16,float16,0,0.47234401702880857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,8,128,1,float16,fp8,0,0.7752319812774658
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,8,128,1,fp8,fp8,0,0.8305631637573242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,16,128,1,fp8,fp8,0,0.4196671962738037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,1,128,1,float16,float16,0,0.4464911937713623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,1,128,1,fp8,fp8,0,0.39798080921173096
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,2,128,1,float16,fp8,0,0.38580000400543213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,16,8,128,1,float16,float16,0,1.9315439224243165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,2,128,1,fp8,fp8,0,0.39147520065307617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,4,128,1,float16,fp8,0,0.3980191946029663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,4,128,1,float16,float16,0,0.4526639938354492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,4,128,1,fp8,fp8,0,0.393126392364502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,8,128,1,float16,fp8,0,0.3983216047286987
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,8,128,1,float16,float16,0,0.4603839874267578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,8,128,1,fp8,fp8,0,0.3947760105133057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,8,128,1,float16,float16,0,0.8999072074890136
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,16,128,1,float16,fp8,0,0.420136022567749
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,1,128,1,float16,fp8,0,0.4009039878845215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,16,2,128,1,float16,float16,0,0.4434639930725098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,16,16,128,1,fp8,fp8,0,0.8515999794006348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,16,1,128,1,float16,fp8,0,3.5069503784179688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,16,1,128,1,fp8,fp8,0,3.5018463134765625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,16,1,128,1,float16,float16,0,4.456846237182617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,16,2,128,1,float16,fp8,0,3.480284881591797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,16,2,128,1,float16,float16,0,4.2543182373046875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,16,2,128,1,fp8,fp8,0,3.487638473510742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,16,4,128,1,float16,fp8,0,3.505401611328125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,16,4,128,1,float16,float16,0,4.30029296875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,16,4,128,1,fp8,fp8,0,3.549038314819336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,16,128,1,fp8,fp8,0,1.8366943359375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,1,128,1,float16,float16,0,2.043132781982422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,16,8,128,1,float16,float16,0,4.464640045166016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,16,128,1,float16,float16,0,2.235699272155762
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,1,128,1,fp8,fp8,0,1.7389551162719727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,16,128,1,float16,fp8,0,1.8396160125732421
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,16,8,128,1,float16,fp8,0,3.6026657104492186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,16,8,128,1,fp8,fp8,0,3.576335906982422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,2,128,1,float16,float16,0,2.1893600463867187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,2,128,1,float16,fp8,0,1.8572864532470703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,2,128,1,fp8,fp8,0,1.995782470703125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,4,128,1,float16,fp8,0,1.7431840896606445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,4,128,1,float16,float16,0,2.128696060180664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,4,128,1,fp8,fp8,0,1.7879968643188477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,16,128,1,float16,float16,0,1.1172176361083985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,16,128,1,float16,fp8,0,0.9446720123291016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,1,128,1,float16,fp8,0,1.7589664459228516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,8,128,1,fp8,fp8,0,1.7980831146240235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,16,128,1,fp8,fp8,0,1.0568911552429199
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,8,128,1,float16,float16,0,2.212886428833008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,1,128,1,float16,float16,0,1.028540802001953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,1,128,1,fp8,fp8,0,0.8873680114746094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,1,128,1,float16,fp8,0,0.9633312225341797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,2,128,1,float16,float16,0,1.0111040115356444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,2,128,1,fp8,fp8,0,0.9036879539489746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,4,128,1,float16,float16,0,1.1328495979309081
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,4,128,1,float16,fp8,0,0.9276384353637696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,4,128,1,fp8,fp8,0,0.8890975952148438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,8,128,1,float16,float16,0,1.051308822631836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,16,128,1,float16,float16,0,0.6401231765747071
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,8,128,1,fp8,fp8,0,0.9243359565734863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,16,128,1,float16,fp8,0,0.4932112216949463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,16,128,1,fp8,fp8,0,0.4778639793395996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,1,128,1,float16,float16,0,0.5129695892333984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,1,128,1,float16,fp8,0,0.4984384059906006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,1,128,1,fp8,fp8,0,0.4571631908416748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,2,128,1,float16,fp8,0,0.44469599723815917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,2,128,1,float16,float16,0,0.5178095817565918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,2,128,1,fp8,fp8,0,0.4633967876434326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,4,128,1,float16,fp8,0,0.47672481536865235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,4,128,1,float16,float16,0,0.5146687984466553
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,4,128,1,fp8,fp8,0,0.44747200012207033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,16,8,128,1,float16,fp8,0,1.9392240524291993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,16,128,1,float16,fp8,0,0.24547839164733887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,16,128,1,float16,float16,0,0.3038320064544678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,8,128,1,float16,fp8,0,0.46923041343688965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,8,128,1,fp8,fp8,0,0.46851840019226076
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,16,128,1,fp8,fp8,0,0.24572479724884033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,1,128,1,float16,float16,0,0.2593168020248413
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,1,128,1,float16,fp8,0,0.23131198883056642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,1,128,1,fp8,fp8,0,0.23178400993347167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,2,128,1,float16,float16,0,0.2604464054107666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,2,128,1,float16,fp8,0,0.22615199089050292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,2,128,1,fp8,fp8,0,0.22973120212554932
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,4,128,1,float16,fp8,0,0.22848000526428222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,4,128,1,fp8,fp8,0,0.22811999320983886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,8,128,1,float16,fp8,0,0.23387041091918945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,8,128,1,fp8,fp8,0,0.23342559337615967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,8,128,1,float16,fp8,0,0.9184927940368652
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,16,8,128,1,float16,float16,0,0.5485455989837646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,4,128,1,float16,float16,0,0.26762239933013915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,16,1,128,1,float16,fp8,0,3.260031890869141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,16,8,128,1,float16,float16,0,0.27660479545593264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,16,1,128,1,float16,float16,0,4.038702392578125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,16,1,128,1,fp8,fp8,0,3.242839813232422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,16,2,128,1,float16,float16,0,4.110283279418946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,16,2,128,1,float16,fp8,0,3.2699169158935546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,16,2,128,1,fp8,fp8,0,3.265367889404297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,16,2,128,1,float16,fp8,0,0.8751071929931641
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,16,4,128,1,fp8,fp8,0,3.298771286010742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,16,4,128,1,float16,fp8,0,3.431296157836914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,16,4,128,1,float16,float16,0,4.068056106567383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,16,128,1,float16,fp8,0,1.758897590637207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,16,128,1,float16,float16,0,2.1770256042480467
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,16,128,1,fp8,fp8,0,1.7695135116577148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,1,128,1,float16,float16,0,1.9381376266479493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,16,8,128,1,float16,float16,0,4.287731170654297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,16,8,128,1,float16,fp8,0,3.4767871856689454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,16,8,128,1,fp8,fp8,0,3.4350353240966798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,1,128,1,fp8,fp8,0,1.638852882385254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,2,128,1,float16,float16,0,2.086347198486328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,2,128,1,float16,fp8,0,1.6418191909790039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,2,128,1,fp8,fp8,0,1.723731231689453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,4,128,1,float16,fp8,0,1.6635984420776366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,4,128,1,float16,float16,0,2.04626407623291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,4,128,1,fp8,fp8,0,1.6760992050170898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,16,128,1,float16,fp8,0,0.9159983634948731
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,16,128,1,float16,float16,0,1.060739231109619
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,8,128,1,float16,float16,0,2.068903923034668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,16,128,1,fp8,fp8,0,0.9787872314453125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,1,128,1,float16,float16,0,0.9566191673278809
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,8,128,1,float16,fp8,0,1.7991968154907227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,8,128,1,fp8,fp8,0,1.6820175170898437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,16,1,128,1,float16,fp8,0,1.7344303131103516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,1,128,1,float16,fp8,0,0.8294015884399414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,1,128,1,fp8,fp8,0,0.9293503761291504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,2,128,1,float16,fp8,0,0.8585727691650391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,2,128,1,float16,float16,0,1.052625560760498
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,4,128,1,float16,fp8,0,0.8413503646850586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,4,128,1,float16,float16,0,0.9693632125854492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,4,128,1,fp8,fp8,0,0.8268032073974609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,16,128,1,float16,fp8,0,0.45665922164916994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,8,128,1,float16,float16,0,1.0128160476684571
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,16,128,1,float16,float16,0,0.5694032192230225
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,8,128,1,float16,fp8,0,0.8771856307983399
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,1,128,1,float16,float16,0,0.47394561767578125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,8,128,1,fp8,fp8,0,0.8617648124694824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,1,128,1,float16,fp8,0,0.41753277778625486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,1,128,1,fp8,fp8,0,0.46503357887268065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,2,128,1,float16,float16,0,0.494704008102417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,2,128,1,fp8,fp8,0,0.4170703887939453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,4,128,1,float16,float16,0,0.48781919479370117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,4,128,1,float16,fp8,0,0.4211264133453369
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,4,128,1,fp8,fp8,0,0.44478559494018555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,8,128,1,float16,float16,0,0.5110432147979737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,8,128,1,float16,fp8,0,0.4307072162628174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,8,128,1,fp8,fp8,0,0.43110241889953616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,16,128,1,float16,float16,0,0.2743936061859131
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,16,128,1,float16,fp8,0,0.24141919612884521
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,16,128,1,fp8,fp8,0,0.23993918895721436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,1,128,1,float16,fp8,0,0.21380319595336914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,1,128,1,float16,float16,0,0.24300000667572022
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,2,128,1,float16,float16,0,0.24863359928131104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,2,128,1,float16,fp8,0,0.22051680088043213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,2,128,1,fp8,fp8,0,0.2140415906906128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,4,128,1,float16,float16,0,0.24868319034576417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,4,128,1,float16,fp8,0,0.21845440864562987
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,4,128,1,fp8,fp8,0,0.21905438899993895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,8,128,1,float16,float16,0,0.2575984001159668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,16,128,1,float16,float16,0,0.144758403301239
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,8,128,1,float16,fp8,0,0.22296481132507323
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,8,128,1,fp8,fp8,0,0.22102720737457277
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,16,128,1,float16,fp8,0,0.12256799936294556
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,16,128,1,fp8,fp8,0,0.1234928011894226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,1,128,1,float16,float16,0,0.12524479627609253
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,1,128,1,float16,fp8,0,0.11158080101013183
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,16,2,128,1,fp8,fp8,0,0.8814175605773926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,2,128,1,float16,float16,0,0.12649279832839966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,2,128,1,float16,fp8,0,0.11257760524749756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,2,128,1,fp8,fp8,0,0.11145919561386108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,4,128,1,float16,float16,0,0.1307904005050659
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,4,128,1,float16,fp8,0,0.11159839630126953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,4,128,1,fp8,fp8,0,0.11195679903030395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,8,128,1,float16,float16,0,0.13352960348129272
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,8,128,1,fp8,fp8,0,0.1132591962814331
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,16,128,1,fp8,fp8,0,0.47945280075073243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,16,2,128,1,float16,fp8,0,0.4215695858001709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,16,1,128,1,fp8,fp8,0,0.21400799751281738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,16,1,128,1,float16,fp8,0,1.9194032669067382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,16,1,128,1,float16,float16,0,2.2901248931884766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,16,2,128,1,float16,float16,0,2.2342464447021486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,1,128,1,fp8,fp8,0,0.11101919412612915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,16,8,128,1,float16,fp8,0,0.11474080085754394
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,16,2,128,1,float16,fp8,0,1.924991989135742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,16,2,128,1,fp8,fp8,0,1.9514175415039063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,16,4,128,1,float16,float16,0,2.485425567626953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,16,4,128,1,fp8,fp8,0,1.9379919052124024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,16,4,128,1,float16,fp8,0,2.0293983459472655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,16,1,128,1,fp8,fp8,0,1.9348720550537108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,16,8,128,1,float16,fp8,0,2.0034767150878907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,16,8,128,1,fp8,fp8,0,2.0345136642456056
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,16,128,1,float16,float16,0,1.3851344108581543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,16,128,1,fp8,fp8,0,1.0854111671447755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,1,128,1,float16,float16,0,1.1061792373657227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,1,128,1,float16,fp8,0,0.9843168258666992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,2,128,1,float16,fp8,0,0.9699248313903809
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,2,128,1,float16,float16,0,1.239508819580078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,2,128,1,fp8,fp8,0,0.9840559959411621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,4,128,1,float16,fp8,0,0.9764127731323242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,4,128,1,float16,float16,0,1.1453184127807616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,4,128,1,fp8,fp8,0,1.0616911888122558
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,8,128,1,float16,float16,0,1.1815263748168945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,16,128,1,float16,float16,0,0.6693327903747559
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,16,8,128,1,float16,float16,0,2.419238471984863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,8,128,1,float16,fp8,0,1.006924819946289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,16,128,1,float16,fp8,0,1.0651904106140138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,8,128,1,fp8,fp8,0,1.042636775970459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,16,128,1,float16,fp8,0,0.5642767906188965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,16,128,1,fp8,fp8,0,0.5455776214599609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,1,128,1,fp8,fp8,0,0.489302396774292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,2,128,1,float16,fp8,0,0.498795223236084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,2,128,1,float16,float16,0,0.5620751857757569
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,1,128,1,float16,fp8,0,0.5080239772796631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,2,128,1,fp8,fp8,0,0.5105584144592286
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,16,1,128,1,fp8,fp8,0,0.9907936096191406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,4,128,1,float16,float16,0,0.578323221206665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,4,128,1,float16,fp8,0,0.49426560401916503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,4,128,1,fp8,fp8,0,0.5024655818939209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,8,128,1,float16,float16,0,0.5976304054260254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,16,128,1,float16,fp8,0,0.2779279947280884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,16,128,1,float16,float16,0,0.33884639739990235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,16,128,1,fp8,fp8,0,0.2784303903579712
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,1,128,1,float16,float16,0,0.28285439014434816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,1,128,1,float16,fp8,0,0.26327199935913087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,1,128,1,fp8,fp8,0,0.2513279914855957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,2,128,1,float16,float16,0,0.2874351978302002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,2,128,1,float16,fp8,0,0.2523200035095215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,2,128,1,fp8,fp8,0,0.2524912118911743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,4,128,1,float16,fp8,0,0.25495359897613523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,4,128,1,float16,float16,0,0.3025264024734497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,4,128,1,fp8,fp8,0,0.2532128095626831
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,8,128,1,float16,fp8,0,0.26190240383148194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,16,128,1,float16,float16,0,0.1845744013786316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,8,128,1,fp8,fp8,0,0.26160640716552735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,16,128,1,fp8,fp8,0,0.1452255964279175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,1,128,1,float16,float16,0,0.1524656057357788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,1,128,1,float16,fp8,0,0.12980159521102905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,2,128,1,float16,float16,0,0.15385119915008544
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,2,128,1,float16,fp8,0,0.1305199980735779
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,2,128,1,fp8,fp8,0,0.13057119846343995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,4,128,1,float16,float16,0,0.15306880474090576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,4,128,1,float16,fp8,0,0.1315551996231079
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,4,128,1,fp8,fp8,0,0.13134080171585083
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,8,128,1,float16,float16,0,0.15890400409698485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,1,128,1,float16,float16,0,0.5550271987915039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,8,128,1,float16,fp8,0,0.13632160425186157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,16,128,1,float16,float16,0,0.09623519778251648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,16,128,1,fp8,fp8,0,0.07828800082206726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,1,128,1,float16,float16,0,0.07959200143814087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,1,128,1,float16,fp8,0,0.07187359929084777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,8,128,1,fp8,fp8,0,0.5084144115447998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,1,128,1,fp8,fp8,0,0.07205119729042053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,2,128,1,float16,float16,0,0.08046079874038696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,2,128,1,float16,fp8,0,0.07205920219421387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,2,128,1,fp8,fp8,0,0.07245919704437256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,4,128,1,float16,fp8,0,0.07233920097351074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,4,128,1,fp8,fp8,0,0.07232159972190857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,16,8,128,1,float16,float16,0,0.30358879566192626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,16,128,1,float16,fp8,0,0.1438431978225708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,8,128,1,float16,fp8,0,0.0735040009021759
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,8,128,1,fp8,fp8,0,0.0726751983165741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,1,128,1,fp8,fp8,0,0.1299872040748596
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,16,128,1,float16,fp8,0,0.07866560220718384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,16,1,128,1,float16,fp8,0,1.8842655181884767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,16,1,128,1,float16,float16,0,2.1935375213623045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,16,8,128,1,float16,fp8,0,0.5135903835296631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,16,1,128,1,fp8,fp8,0,1.9006832122802735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,4,128,1,float16,float16,0,0.08217440247535705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,16,8,128,1,float16,float16,0,0.08825600147247314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,16,2,128,1,float16,float16,0,2.16345272064209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,16,2,128,1,float16,fp8,0,1.8910272598266602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,16,2,128,1,fp8,fp8,0,1.9683839797973632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,16,4,128,1,float16,fp8,0,1.9290351867675781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,16,4,128,1,float16,float16,0,2.270689582824707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,16,4,128,1,fp8,fp8,0,1.9130271911621093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,16,8,128,1,fp8,fp8,0,0.1359455943107605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,16,128,1,float16,fp8,0,1.0861663818359375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,16,128,1,float16,float16,0,1.315884780883789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,16,8,128,1,float16,float16,0,2.409449577331543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,16,8,128,1,fp8,fp8,0,2.0094655990600585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,16,8,128,1,float16,fp8,0,2.0505807876586912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,16,128,1,fp8,fp8,0,1.0834704399108888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,1,128,1,float16,float16,0,1.0701663970947266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,1,128,1,float16,fp8,0,0.9519696235656738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,1,128,1,fp8,fp8,0,0.9988911628723145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,2,128,1,float16,float16,0,1.1316720008850099
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,2,128,1,fp8,fp8,0,0.954366397857666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,2,128,1,float16,fp8,0,0.9722127914428711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,4,128,1,float16,fp8,0,0.9742447853088378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,4,128,1,float16,float16,0,1.1062527656555177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,4,128,1,fp8,fp8,0,0.9625231742858886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,8,128,1,float16,float16,0,1.1880335807800293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,16,128,1,float16,fp8,0,0.5522592067718506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,16,128,1,float16,float16,0,0.6682928085327149
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,16,128,1,fp8,fp8,0,0.5498271942138672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,8,128,1,float16,fp8,0,1.0460288047790527
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,16,8,128,1,fp8,fp8,0,1.001854419708252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,1,128,1,fp8,fp8,0,0.4822991847991943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,2,128,1,float16,float16,0,0.5665264129638672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,2,128,1,float16,fp8,0,0.4826655864715576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,2,128,1,fp8,fp8,0,0.4955455780029297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,4,128,1,float16,float16,0,0.565009593963623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,4,128,1,float16,fp8,0,0.4890160083770752
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,4,128,1,fp8,fp8,0,0.5092144012451172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,8,128,1,float16,float16,0,0.5973328113555908
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,8,128,1,float16,fp8,0,0.5139647960662842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,8,128,1,fp8,fp8,0,0.5227744102478027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,16,128,1,float16,fp8,0,0.2936863899230957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,16,128,1,fp8,fp8,0,0.28353760242462156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,2,128,1,float16,float16,0,0.27830240726470945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,2,128,1,float16,fp8,0,0.24515678882598876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,2,128,1,fp8,fp8,0,0.2452944040298462
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,4,128,1,float16,float16,0,0.2857280015945435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,1,128,1,float16,float16,0,0.5384128093719482
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,4,128,1,float16,fp8,0,0.2490015983581543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,4,128,1,fp8,fp8,0,0.25063838958740237
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,16,1,128,1,float16,fp8,0,0.5077519893646241
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,8,128,1,float16,fp8,0,0.2607167959213257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,8,128,1,float16,float16,0,0.3019968032836914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,8,128,1,fp8,fp8,0,0.26131200790405273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,16,128,1,float16,float16,0,0.17743680477142335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,16,128,1,float16,fp8,0,0.14728480577468872
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,16,128,1,fp8,fp8,0,0.1478816032409668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,1,128,1,float16,float16,0,0.14374879598617554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,1,128,1,float16,fp8,0,0.12816319465637208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,16,128,1,float16,float16,0,0.33929920196533203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,1,128,1,fp8,fp8,0,0.12830719947814942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,2,128,1,float16,float16,0,0.1462496042251587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,2,128,1,float16,fp8,0,0.12813760042190553
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,4,128,1,fp8,fp8,0,0.13055200576782228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,4,128,1,float16,fp8,0,0.1299471974372864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,2,128,1,fp8,fp8,0,0.1287727952003479
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,1,128,1,float16,float16,0,0.27779359817504884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,8,128,1,float16,float16,0,0.1574463963508606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,8,128,1,fp8,fp8,0,0.13575520515441894
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,16,128,1,float16,fp8,0,0.07996159791946411
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,1,128,1,float16,float16,0,0.07397760152816772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,16,128,1,fp8,fp8,0,0.0798416018486023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,1,128,1,float16,fp8,0,0.0688704013824463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,1,128,1,fp8,fp8,0,0.06794720292091369
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,2,128,1,float16,float16,0,0.07586719989776611
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,2,128,1,fp8,fp8,0,0.06800799965858459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,4,128,1,float16,float16,0,0.07928000092506408
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,4,128,1,float16,fp8,0,0.06797599792480469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,4,128,1,fp8,fp8,0,0.06829919815063476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,8,128,1,float16,float16,0,0.08423200249671936
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,8,128,1,float16,fp8,0,0.07106239795684814
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,8,128,1,fp8,fp8,0,0.07103840112686158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,16,128,1,float16,float16,0,0.05035840272903443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,16,128,1,fp8,fp8,0,0.04192480146884918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,1,128,1,float16,float16,0,0.04073440134525299
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,1,128,1,float16,fp8,0,0.038206401467323306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,1,128,1,fp8,fp8,0,0.03820480108261108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,2,128,1,float16,float16,0,0.04107680022716522
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,2,128,1,float16,fp8,0,0.03852640092372894
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,2,128,1,fp8,fp8,0,0.03853600025177002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,4,128,1,float16,float16,0,0.04208639860153198
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,4,128,1,float16,fp8,0,0.03887679874897003
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,4,128,1,fp8,fp8,0,0.038894400000572205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,8,128,1,float16,float16,0,0.04344959855079651
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,4,128,1,float16,float16,0,0.14968160390853882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,8,128,1,float16,fp8,0,0.03964160084724426
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,1,128,1,float16,fp8,0,0.2473695993423462
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,16,8,128,1,float16,fp8,0,0.13589279651641845
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,2,128,1,float16,fp8,0,0.06841279864311219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,16,128,1,float16,fp8,0,0.041894400119781496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,16,1,128,1,float16,float16,0,1.3091903686523438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,16,1,128,1,fp8,fp8,0,1.1824527740478517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,16,2,128,1,float16,float16,0,1.3179823875427246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,16,2,128,1,float16,fp8,0,1.1792431831359864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,16,2,128,1,fp8,fp8,0,1.1742239952087403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,16,4,128,1,float16,float16,0,1.3656944274902343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,16,1,128,1,fp8,fp8,0,0.2511199951171875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,16,16,128,1,float16,float16,0,0.09289439916610717
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,16,4,128,1,float16,fp8,0,1.2832575798034669
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,16,4,128,1,fp8,fp8,0,1.190244770050049
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,16,8,128,1,float16,float16,0,1.4655247688293458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,16,8,128,1,float16,fp8,0,1.2931792259216308
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,16,128,1,float16,float16,0,0.8651103973388672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,16,8,128,1,fp8,fp8,0,1.2504863739013672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,16,128,1,float16,fp8,0,0.6950384140014648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,16,1,128,1,float16,fp8,0,1.1724191665649415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,16,128,1,fp8,fp8,0,0.7478847980499268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,1,128,1,float16,float16,0,0.6617568016052247
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,1,128,1,fp8,fp8,0,0.5867824077606201
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,2,128,1,float16,float16,0,0.6635888099670411
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,2,128,1,float16,fp8,0,0.5916368007659912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,2,128,1,fp8,fp8,0,0.6180480003356934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,4,128,1,float16,fp8,0,0.5964064121246337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,4,128,1,float16,float16,0,0.6882863998413086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,4,128,1,fp8,fp8,0,0.6043424129486084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,8,128,1,float16,float16,0,0.740721607208252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,16,8,128,1,fp8,fp8,0,0.0395440012216568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,8,128,1,fp8,fp8,0,0.6279136180877686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,8,128,1,float16,fp8,0,0.6466464042663574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,16,128,1,float16,fp8,0,0.3537136077880859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,16,128,1,fp8,fp8,0,0.355513596534729
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,1,128,1,fp8,fp8,0,0.3061232089996338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,2,128,1,float16,float16,0,0.34029760360717776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,2,128,1,float16,fp8,0,0.3020911931991577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,2,128,1,fp8,fp8,0,0.3057039976119995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,4,128,1,float16,float16,0,0.35164000988006594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,4,128,1,float16,fp8,0,0.3065471887588501
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,4,128,1,fp8,fp8,0,0.3086911916732788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,8,128,1,float16,float16,0,0.37553598880767824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,8,128,1,float16,fp8,0,0.32181758880615235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,8,128,1,fp8,fp8,0,0.32508800029754636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,16,1,128,1,float16,fp8,0,0.6002111911773682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,16,128,1,fp8,fp8,0,0.18552000522613527
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,1,128,1,float16,float16,0,0.17454400062561035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,1,128,1,float16,fp8,0,0.15797920227050782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,1,128,1,fp8,fp8,0,0.15777120590209961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,2,128,1,float16,float16,0,0.1770624041557312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,2,128,1,float16,fp8,0,0.1583824038505554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,2,128,1,fp8,fp8,0,0.1581936001777649
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,4,128,1,float16,float16,0,0.1827712059020996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,4,128,1,float16,fp8,0,0.1607983946800232
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,4,128,1,fp8,fp8,0,0.16036959886550903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,8,128,1,float16,float16,0,0.1943727970123291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,8,128,1,float16,fp8,0,0.16920000314712524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,8,128,1,fp8,fp8,0,0.1689311981201172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,16,128,1,float16,float16,0,0.11648800373077392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,16,128,1,float16,fp8,0,0.10009919404983521
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,16,128,1,fp8,fp8,0,0.09996160268783569
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,1,128,1,float16,fp8,0,0.08332800269126892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,16,128,1,float16,float16,0,0.4247039794921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,1,128,1,fp8,fp8,0,0.08307039737701416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,2,128,1,float16,float16,0,0.09466239809989929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,4,128,1,float16,float16,0,0.0978272020816803
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,2,128,1,fp8,fp8,0,0.08397600054740906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,1,128,1,float16,fp8,0,0.29915359020233157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,4,128,1,float16,fp8,0,0.08604159951210022
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,4,128,1,fp8,fp8,0,0.08621439933776856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,8,128,1,float16,float16,0,0.10238879919052124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,8,128,1,float16,fp8,0,0.0920415997505188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,8,128,1,fp8,fp8,0,0.09196479916572571
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,16,128,1,float16,fp8,0,0.05491039752960205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,16,128,1,fp8,fp8,0,0.05433599948883057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,1,128,1,float16,float16,0,0.05002560019493103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,1,128,1,float16,fp8,0,0.04620800018310547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,1,128,1,fp8,fp8,0,0.046614399552345274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,2,128,1,float16,float16,0,0.049886399507522584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,2,128,1,float16,fp8,0,0.04579040110111236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,2,128,1,fp8,fp8,0,0.0463919997215271
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,16,128,1,float16,fp8,0,0.1843008041381836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,4,128,1,float16,float16,0,0.05144799947738647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,4,128,1,fp8,fp8,0,0.04596480131149292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,8,128,1,float16,float16,0,0.05758399963378906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,8,128,1,fp8,fp8,0,0.048419201374053956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,16,128,1,float16,float16,0,0.033529600501060484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,16,128,1,float16,fp8,0,0.029230400919914246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,16,128,1,fp8,fp8,0,0.02961280047893524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,1,128,1,float16,float16,0,0.02993920147418976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,1,128,1,float16,fp8,0,0.027993598580360414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,1,128,1,fp8,fp8,0,0.02807680070400238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,2,128,1,float16,float16,0,0.030059200525283814
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,1,128,1,float16,float16,0,0.09284319877624511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,2,128,1,float16,fp8,0,0.02786239981651306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,2,128,1,fp8,fp8,0,0.02810400128364563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,4,128,1,float16,fp8,0,0.028303998708724975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,16,2,128,1,float16,fp8,0,0.08379520177841186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,4,128,1,fp8,fp8,0,0.028164801001548768
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,16,1,128,1,float16,float16,0,0.33378560543060304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,8,128,1,fp8,fp8,0,0.028590399026870727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,16,128,1,float16,float16,0,0.06417120099067689
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,16,16,128,1,float16,float16,0,0.2202768087387085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,4,128,1,float16,fp8,0,0.04608319997787476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,16,1,128,1,float16,float16,0,1.321183967590332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,16,8,128,1,float16,fp8,0,0.047870400547981265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,16,1,128,1,fp8,fp8,0,1.221836757659912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,16,2,128,1,float16,fp8,0,1.2242207527160645
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,16,2,128,1,float16,float16,0,1.3530832290649415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,4,128,1,float16,float16,0,0.03043360114097595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,16,2,128,1,fp8,fp8,0,1.2224623680114746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,8,128,1,float16,float16,0,0.031142398715019226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,16,4,128,1,float16,float16,0,1.4053215980529785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,16,4,128,1,float16,fp8,0,1.2474047660827636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,16,4,128,1,fp8,fp8,0,1.2689824104309082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,16,8,128,1,float16,float16,0,1.5553647994995117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,16,8,128,1,float16,fp8,0,1.3416223526000977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,16,1,128,1,float16,fp8,0,1.213209629058838
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,16,128,1,float16,float16,0,0.9132335662841797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,16,128,1,float16,fp8,0,0.7638256072998046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,16,128,1,fp8,fp8,0,0.7790016174316406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,16,8,128,1,fp8,fp8,0,1.3300095558166505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,1,128,1,float16,float16,0,0.6733583927154541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,1,128,1,float16,fp8,0,0.6133232116699219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,1,128,1,fp8,fp8,0,0.6178559780120849
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,2,128,1,float16,float16,0,0.6838111877441406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,2,128,1,float16,fp8,0,0.6384528160095215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,4,128,1,float16,float16,0,0.7131199836730957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,4,128,1,float16,fp8,0,0.6261536121368408
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,4,128,1,fp8,fp8,0,0.6309040069580079
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,16,8,128,1,float16,fp8,0,0.02863520085811615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,8,128,1,float16,float16,0,0.7803391933441162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,16,128,1,float16,float16,0,0.4633535861968994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,8,128,1,float16,fp8,0,0.6795455932617187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,16,128,1,float16,fp8,0,0.386190390586853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,8,128,1,fp8,fp8,0,0.6762735843658447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,1,128,1,float16,float16,0,0.34395039081573486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,1,128,1,float16,fp8,0,0.3124847888946533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,2,128,1,float16,float16,0,0.3473599910736084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,2,128,1,float16,fp8,0,0.3180880069732666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,2,128,1,fp8,fp8,0,0.3189296007156372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,4,128,1,float16,float16,0,0.36005439758300783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,4,128,1,float16,fp8,0,0.32262239456176756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,8,128,1,float16,float16,0,0.3988480091094971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,8,128,1,float16,fp8,0,0.33964641094207765
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,8,128,1,fp8,fp8,0,0.343123197555542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,16,128,1,float16,float16,0,0.24090080261230468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,16,128,1,float16,fp8,0,0.19824960231781005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,16,128,1,fp8,fp8,0,0.1995519995689392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,1,128,1,float16,float16,0,0.17949119806289673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,1,128,1,fp8,fp8,0,0.16375999450683593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,2,128,1,float16,float16,0,0.18285280466079712
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,2,128,1,float16,fp8,0,0.16400959491729736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,2,128,1,fp8,fp8,0,0.16615840196609497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,4,128,1,float16,float16,0,0.18844159841537475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,16,2,128,1,fp8,fp8,0,0.6200223922729492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,4,128,1,float16,fp8,0,0.1694831967353821
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,4,128,1,fp8,fp8,0,0.16752320528030396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,8,128,1,float16,float16,0,0.20489919185638428
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,8,128,1,float16,fp8,0,0.18001439571380615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,8,128,1,fp8,fp8,0,0.17819839715957642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,16,128,1,float16,float16,0,0.12589759826660157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,16,128,1,float16,fp8,0,0.10721280574798583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,16,128,1,fp8,fp8,0,0.10696799755096435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,1,128,1,float16,fp8,0,0.08878560066223144
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,1,128,1,fp8,fp8,0,0.08796160221099854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,16,128,1,fp8,fp8,0,0.3861583948135376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,2,128,1,float16,fp8,0,0.08798879981040955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,1,128,1,fp8,fp8,0,0.31748960018157957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,2,128,1,fp8,fp8,0,0.0877344012260437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,4,128,1,float16,float16,0,0.10133600234985352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,4,128,1,float16,fp8,0,0.09011679887771606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,4,128,1,fp8,fp8,0,0.08968639969825745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,8,128,1,float16,float16,0,0.10914080142974854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,8,128,1,float16,fp8,0,0.09564160108566284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,16,128,1,float16,float16,0,0.06821920275688172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,16,4,128,1,fp8,fp8,0,0.32228960990905764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,16,128,1,fp8,fp8,0,0.058203202486038205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,1,128,1,float16,float16,0,0.04969600141048432
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,1,128,1,fp8,fp8,0,0.047279998660087585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,2,128,1,float16,float16,0,0.0511568009853363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,2,128,1,float16,fp8,0,0.04704160094261169
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,2,128,1,fp8,fp8,0,0.04771519899368286
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,16,1,128,1,float16,fp8,0,0.16223679780960082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,4,128,1,fp8,fp8,0,0.046881601214408875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,8,128,1,float16,float16,0,0.06019520163536072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,8,128,1,float16,fp8,0,0.05134720206260681
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,8,128,1,fp8,fp8,0,0.051367998123168945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,16,128,1,float16,float16,0,0.03827199935913086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,16,128,1,float16,fp8,0,0.030024001002311708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,16,128,1,fp8,fp8,0,0.029905599355697633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,1,128,1,float16,float16,0,0.02788960039615631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,1,128,1,float16,fp8,0,0.027569600939750673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,1,128,1,fp8,fp8,0,0.027592000365257264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,1,128,1,float16,float16,0,0.09490879774093627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,2,128,1,float16,float16,0,0.028275200724601747
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,2,128,1,fp8,fp8,0,0.02740960121154785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,2,128,1,float16,fp8,0,0.02783840000629425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,2,128,1,float16,float16,0,0.09773120284080505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,4,128,1,float16,fp8,0,0.028065600991249086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,8,128,1,float16,float16,0,0.030553600192070006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,4,128,1,fp8,fp8,0,0.02874560058116913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,8,128,1,float16,fp8,0,0.02871040105819702
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,8,128,1,fp8,fp8,0,0.028987199068069458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,16,128,1,float16,float16,0,0.025228801369667053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,16,128,1,float16,fp8,0,0.02378080040216446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,16,128,1,fp8,fp8,0,0.023419199883937834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,1,128,1,float16,float16,0,0.0235615998506546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,1,128,1,float16,fp8,0,0.0224031999707222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,16,128,1,float16,fp8,0,0.058499199151992795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,2,128,1,float16,float16,0,0.023387199640274046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,2,128,1,float16,fp8,0,0.02268799990415573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,1,128,1,float16,fp8,0,0.0471455991268158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,2,128,1,fp8,fp8,0,0.02280000001192093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,4,128,1,float16,float16,0,0.023689599335193635
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,4,128,1,float16,fp8,0,0.022676800191402436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,4,128,1,fp8,fp8,0,0.02263039946556091
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,8,128,1,float16,float16,0,0.024545599520206452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,8,128,1,float16,fp8,0,0.022844800353050233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,8,128,1,fp8,fp8,0,0.023238399624824525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,4,128,1,float16,float16,0,0.05553600192070007
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,16,1,128,1,float16,float16,0,0.9456303596496582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,16,1,128,1,float16,fp8,0,0.914572811126709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,16,1,128,1,fp8,fp8,0,0.9138591766357422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,16,4,128,1,float16,float16,0,0.02853600084781647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,16,2,128,1,float16,float16,0,0.9699295997619629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,16,2,128,1,float16,fp8,0,0.9170448303222656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,16,2,128,1,fp8,fp8,0,0.9163007736206055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,16,8,128,1,fp8,fp8,0,0.09522719979286194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,16,1,128,1,fp8,fp8,0,0.022526399791240694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,16,4,128,1,float16,float16,0,1.019215965270996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,16,4,128,1,fp8,fp8,0,0.9394335746765137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,16,4,128,1,float16,fp8,0,0.04758400022983551
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,16,8,128,1,float16,float16,0,1.1563728332519532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,16,8,128,1,fp8,fp8,0,1.0318448066711425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,16,128,1,float16,float16,0,0.7155119895935058
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,1,128,1,float16,float16,0,0.4786079883575439
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,16,128,1,fp8,fp8,0,0.6195759773254395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,1,128,1,float16,fp8,0,0.4601424217224121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,1,128,1,fp8,fp8,0,0.46394882202148435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,2,128,1,float16,float16,0,0.4901247978210449
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,2,128,1,float16,fp8,0,0.46710882186889646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,2,128,1,fp8,fp8,0,0.4614016056060791
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,4,128,1,float16,float16,0,0.5195216178894043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,4,128,1,float16,fp8,0,0.47747201919555665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,4,128,1,fp8,fp8,0,0.47879681587219236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,8,128,1,float16,float16,0,0.5804224014282227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,8,128,1,float16,fp8,0,0.5225008010864258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,16,128,1,float16,float16,0,0.36710081100463865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,8,128,1,fp8,fp8,0,0.5247312068939209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,16,128,1,fp8,fp8,0,0.3123568058013916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,1,128,1,float16,float16,0,0.24642400741577147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,1,128,1,float16,fp8,0,0.23757119178771974
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,16,4,128,1,float16,fp8,0,0.9452672004699707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,1,128,1,fp8,fp8,0,0.2397711992263794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,2,128,1,float16,float16,0,0.2498095989227295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,2,128,1,float16,fp8,0,0.2390575885772705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,2,128,1,fp8,fp8,0,0.23890080451965331
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,4,128,1,float16,fp8,0,0.24619359970092775
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,4,128,1,float16,float16,0,0.2667088031768799
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,4,128,1,fp8,fp8,0,0.2431391954421997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,8,128,1,float16,float16,0,0.2997119903564453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,8,128,1,float16,fp8,0,0.2679919958114624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,16,16,128,1,float16,fp8,0,0.616758394241333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,16,128,1,float16,fp8,0,0.16281440258026122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,8,128,1,fp8,fp8,0,0.2693471908569336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,1,128,1,float16,float16,0,0.12938400506973266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,1,128,1,fp8,fp8,0,0.12360960245132446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,2,128,1,float16,fp8,0,0.1253376007080078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,2,128,1,fp8,fp8,0,0.12434240579605102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,4,128,1,float16,float16,0,0.14027999639511107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,4,128,1,float16,fp8,0,0.12792160511016845
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,4,128,1,fp8,fp8,0,0.1285423994064331
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,8,128,1,float16,float16,0,0.15503519773483276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,8,128,1,float16,fp8,0,0.14082560539245606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,8,128,1,fp8,fp8,0,0.13948800563812255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,16,128,1,float16,float16,0,0.10191680192947387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,16,128,1,float16,fp8,0,0.08760640025138855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,16,128,1,fp8,fp8,0,0.08845760226249695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,1,128,1,float16,float16,0,0.07064160108566284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,1,128,1,float16,fp8,0,0.06851840019226074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,16,16,128,1,float16,fp8,0,0.31574881076812744
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,2,128,1,float16,float16,0,0.072953599691391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,2,128,1,float16,fp8,0,0.06888960003852844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,2,128,1,fp8,fp8,0,0.06897280216217042
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,4,128,1,float16,float16,0,0.07692000269889832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,4,128,1,float16,fp8,0,0.07064319849014282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,4,128,1,fp8,fp8,0,0.06989759802818299
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,8,128,1,float16,float16,0,0.0852895975112915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,8,128,1,float16,fp8,0,0.07658720016479492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,8,128,1,fp8,fp8,0,0.07647519707679748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,16,128,1,float16,float16,0,0.05759040117263794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,16,128,1,float16,fp8,0,0.05076320171356201
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,16,128,1,fp8,fp8,0,0.05063040256500244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,1,128,1,float16,float16,0,0.040064001083374025
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,1,128,1,float16,fp8,0,0.037857601046562196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,16,8,128,1,float16,fp8,0,1.0308591842651367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,16,128,1,fp8,fp8,0,0.16444319486618042
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,2,128,1,float16,float16,0,0.040424001216888425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,4,128,1,float16,float16,0,0.0443120002746582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,2,128,1,float16,fp8,0,0.03797439932823181
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,2,128,1,float16,float16,0,0.13383840322494506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,4,128,1,float16,fp8,0,0.03799839913845062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,8,128,1,float16,float16,0,0.04978559911251068
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,8,128,1,float16,fp8,0,0.041740798950195314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,8,128,1,fp8,fp8,0,0.04217279851436615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,16,128,1,float16,float16,0,0.03409120142459869
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,16,128,1,float16,fp8,0,0.028279998898506166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,16,128,1,fp8,fp8,0,0.02855679988861084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,1,128,1,float16,float16,0,0.02518880069255829
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,1,128,1,float16,fp8,0,0.024366399645805357
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,1,128,1,fp8,fp8,0,0.024612799286842346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,2,128,1,float16,float16,0,0.025316798686981203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,2,128,1,float16,fp8,0,0.024553599953651428
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,2,128,1,fp8,fp8,0,0.02446240037679672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,4,128,1,float16,float16,0,0.025536000728607178
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,4,128,1,float16,fp8,0,0.024809600412845613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,4,128,1,fp8,fp8,0,0.0248879998922348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,8,128,1,float16,float16,0,0.027156800031661987
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,8,128,1,float16,fp8,0,0.025839999318122864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,16,8,128,1,fp8,fp8,0,0.025899198651313782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,16,128,1,float16,float16,0,0.019575999677181245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,16,128,1,float16,fp8,0,0.01897120028734207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,16,128,1,fp8,fp8,0,0.01892320066690445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,1,128,1,float16,float16,0,0.017089599370956422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,1,128,1,float16,fp8,0,0.017375999689102174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,1,128,1,fp8,fp8,0,0.017393599450588226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,2,128,1,float16,float16,0,0.01727039963006973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,2,128,1,float16,fp8,0,0.017403200268745422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,16,128,1,float16,float16,0,0.1908560037612915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,2,128,1,fp8,fp8,0,0.01781439930200577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,4,128,1,float16,float16,0,0.01759999990463257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,4,128,1,float16,fp8,0,0.017715199291706084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,2,128,1,fp8,fp8,0,0.03809759914875031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,4,128,1,fp8,fp8,0,0.017475199699401856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,8,128,1,float16,float16,0,0.018438400328159334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,16,1,128,1,float16,fp8,0,0.12528640031814575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,8,128,1,float16,fp8,0,0.01788640022277832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,16,8,128,1,fp8,fp8,0,0.01793439984321594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,4,128,1,fp8,fp8,0,0.037992000579833984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,16,128,1,float16,fp8,0,0.017987200617790224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,1,128,1,float16,float16,0,0.016889600455760954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,2,128,1,float16,float16,0,0.01698080003261566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,2,128,1,float16,fp8,0,0.0169855996966362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,2,128,1,fp8,fp8,0,0.016977599263191222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,4,128,1,float16,float16,0,0.017089599370956422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,4,128,1,float16,fp8,0,0.01709440052509308
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,4,128,1,fp8,fp8,0,0.01716320067644119
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,8,128,1,float16,float16,0,0.017467199265956877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,8,128,1,float16,fp8,0,0.017499199509620665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,8,128,1,fp8,fp8,0,0.017497600615024568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,16,1,128,1,fp8,fp8,0,0.06775360107421875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,16,1,128,1,float16,float16,0,0.3896591901779175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,16,1,128,1,float16,fp8,0,0.3889807939529419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,16,1,128,1,fp8,fp8,0,0.3923664093017578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,16,2,128,1,float16,float16,0,0.4018400192260742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,16,2,128,1,float16,fp8,0,0.39067840576171875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,16,2,128,1,fp8,fp8,0,0.39366400241851807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,16,4,128,1,float16,float16,0,0.4317808151245117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,16,1,128,1,fp8,fp8,0,0.0380511999130249
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,16,4,128,1,float16,fp8,0,0.4025248050689697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,16,4,128,1,fp8,fp8,0,0.40584640502929686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,16,128,1,float16,float16,0,0.018089599907398224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,16,8,128,1,float16,float16,0,0.4975423812866211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,16,128,1,fp8,fp8,0,0.017843200266361235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,16,8,128,1,float16,fp8,0,0.45421757698059084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,1,128,1,float16,fp8,0,0.016998399794101716
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,16,8,128,1,fp8,fp8,0,0.4494175910949707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,16,128,1,float16,float16,0,0.32426559925079346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,1,128,1,float16,fp8,0,0.20371360778808595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,16,128,1,float16,fp8,0,0.26607840061187743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,1,128,1,float16,float16,0,0.20099360942840577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,1,128,1,fp8,fp8,0,0.20257599353790284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,2,128,1,float16,fp8,0,0.2029952049255371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,4,128,1,float16,float16,0,0.22031519412994385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,4,128,1,float16,fp8,0,0.2108720064163208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,4,128,1,fp8,fp8,0,0.20943520069122315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,8,128,1,float16,float16,0,0.2539360046386719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,8,128,1,float16,fp8,0,0.23156321048736572
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,16,128,1,float16,float16,0,0.16984959840774536
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,16,128,1,float16,fp8,0,0.14028799533843994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,16,128,1,fp8,fp8,0,0.14023040533065795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,1,128,1,float16,float16,0,0.10707039833068847
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,1,128,1,float16,fp8,0,0.10782079696655274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,2,128,1,float16,float16,0,0.11019999980926513
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,2,128,1,float16,fp8,0,0.10807520151138306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,2,128,1,fp8,fp8,0,0.10788320302963257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,4,128,1,float16,float16,0,0.11811840534210205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,4,128,1,float16,fp8,0,0.1112671971321106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,4,128,1,fp8,fp8,0,0.11196000576019287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,8,128,1,float16,float16,0,0.13420480489730835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,8,128,1,float16,fp8,0,0.12294880151748658
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,8,128,1,fp8,fp8,0,0.12306400537490844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,16,128,1,float16,fp8,0,0.07734079957008362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,16,128,1,float16,float16,0,0.09206879734992982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,16,128,1,fp8,fp8,0,0.07781760096549988
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,1,128,1,float16,fp8,0,0.05973119735717773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,1,128,1,fp8,fp8,0,0.059934401512146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,16,1,128,1,fp8,fp8,0,0.01714559942483902
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,2,128,1,float16,fp8,0,0.06007680296897888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,2,128,1,fp8,fp8,0,0.059780800342559816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,16,128,1,fp8,fp8,0,0.268670392036438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,2,128,1,float16,float16,0,0.20545918941497804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,4,128,1,float16,fp8,0,0.06196960210800171
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,4,128,1,fp8,fp8,0,0.06121439933776855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,2,128,1,fp8,fp8,0,0.20448799133300782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,8,128,1,float16,float16,0,0.07449120283126831
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,8,128,1,float16,fp8,0,0.06788319945335389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,16,128,1,float16,float16,0,0.05254560112953186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,8,128,1,fp8,fp8,0,0.06845440268516541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,16,128,1,fp8,fp8,0,0.04519839882850647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,1,128,1,float16,float16,0,0.03237760066986084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,1,128,1,float16,fp8,0,0.033144000172615054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,1,128,1,fp8,fp8,0,0.03321279883384705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,16,1,128,1,fp8,fp8,0,0.10714720487594605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,2,128,1,float16,float16,0,0.03383199870586395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,2,128,1,float16,fp8,0,0.03313600122928619
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,2,128,1,fp8,fp8,0,0.033435198664665225
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,4,128,1,float16,float16,0,0.03883039951324463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,4,128,1,float16,fp8,0,0.03323360085487366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,4,128,1,fp8,fp8,0,0.03318400084972382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,8,128,1,float16,float16,0,0.042843198776245116
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,8,128,1,float16,fp8,0,0.0373744010925293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,8,128,1,fp8,fp8,0,0.03707680106163025
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,16,128,1,float16,float16,0,0.030140799283981324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,16,128,1,float16,fp8,0,0.02388159930706024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,16,128,1,fp8,fp8,0,0.024086399376392363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,1,128,1,float16,fp8,0,0.02122880071401596
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,1,128,1,fp8,fp8,0,0.02126079946756363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,2,128,1,float16,float16,0,0.020713600516319274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,2,128,1,float16,fp8,0,0.021160000562667848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,1,128,1,float16,float16,0,0.05999839901924133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,2,128,1,fp8,fp8,0,0.021172800660133363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,4,128,1,float16,float16,0,0.02123039960861206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,4,128,1,float16,fp8,0,0.02141599953174591
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,4,128,1,fp8,fp8,0,0.021536000072956085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,8,128,1,float16,float16,0,0.022651199996471406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,8,128,1,float16,fp8,0,0.02252320051193237
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,8,128,1,fp8,fp8,0,0.02237119972705841
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,16,128,1,float16,float16,0,0.016315199434757233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,16,128,1,float16,fp8,0,0.01565600037574768
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,16,128,1,fp8,fp8,0,0.015643200278282164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,1,128,1,float16,float16,0,0.014023999869823455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,1,128,1,float16,fp8,0,0.01438080072402954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,1,128,1,fp8,fp8,0,0.014084799587726593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,2,128,1,float16,float16,0,0.014158399403095245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,2,128,1,float16,fp8,0,0.014155200123786927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,2,128,1,fp8,fp8,0,0.014248000085353851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,4,128,1,float16,float16,0,0.06628800034523011
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,4,128,1,float16,fp8,0,0.014295999705791474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,4,128,1,fp8,fp8,0,0.014472000300884247
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,8,128,1,float16,fp8,0,0.015024000406265258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,8,128,1,fp8,fp8,0,0.015027199685573579
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,16,128,1,float16,float16,0,0.01499519944190979
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,16,128,1,float16,fp8,0,0.015048000216484069
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,16,8,128,1,fp8,fp8,0,0.23407199382781982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,16,128,1,fp8,fp8,0,0.015017600357532501
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,1,128,1,float16,float16,0,0.013817599415779114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,1,128,1,fp8,fp8,0,0.014156800508499146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,2,128,1,float16,float16,0,0.013788799941539764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,2,128,1,float16,fp8,0,0.014313599467277527
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,2,128,1,fp8,fp8,0,0.0142752006649971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,4,128,1,float16,float16,0,0.01419840008020401
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,4,128,1,float16,fp8,0,0.014238399267196656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,4,128,1,fp8,fp8,0,0.014183999598026275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,8,128,1,float16,float16,0,0.014351999759674073
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,16,1,128,1,float16,float16,0,0.020619200170040132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,8,128,1,float16,fp8,0,0.014588800072669984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,8,128,1,fp8,fp8,0,0.014372800290584565
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,16,128,1,float16,fp8,0,0.014561599493026734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,16,128,1,fp8,fp8,0,0.014606399834156037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,1,128,1,float16,float16,0,0.013649600744247436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,1,128,1,float16,fp8,0,0.013920000195503235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,1,128,1,fp8,fp8,0,0.014079999923706055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,2,128,1,float16,float16,0,0.013518400490283966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,16,2,128,1,float16,float16,0,0.06198400259017944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,2,128,1,float16,fp8,0,0.013899199664592743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,2,128,1,fp8,fp8,0,0.013828800618648529
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,4,128,1,float16,fp8,0,0.01383039951324463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,4,128,1,float16,float16,0,0.014419199526309967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,16,8,128,1,float16,float16,0,0.014870400726795196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,8,128,1,float16,float16,0,0.013913600146770478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,8,128,1,fp8,fp8,0,0.014183999598026275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,16,16,128,1,float16,fp8,0,0.04425759911537171
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,16,1,128,1,float16,float16,0,0.18893120288848878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,16,1,128,1,float16,fp8,0,0.1951583981513977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,16,1,128,1,float16,fp8,0,0.014192000031471252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,16,1,128,1,fp8,fp8,0,0.19260159730911255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,16,2,128,1,float16,fp8,0,0.1975648045539856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,16,4,128,1,float16,float16,0,0.20803680419921874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,16,4,128,1,float16,fp8,0,0.20811998844146729
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,16,4,128,1,fp8,fp8,0,0.20452160835266114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,16,8,128,1,float16,float16,0,0.24332959651947023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,16,8,128,1,float16,fp8,0,0.22896161079406738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,16,8,128,1,fp8,fp8,0,0.23052000999450684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,16,128,1,float16,float16,0,0.014284799993038177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,16,128,1,float16,float16,0,0.164411199092865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,16,128,1,float16,fp8,0,0.1404863953590393
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,1,128,1,float16,float16,0,0.09895359873771667
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,16,128,1,fp8,fp8,0,0.14084320068359374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,1,128,1,float16,fp8,0,0.10396319627761841
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,2,128,1,float16,float16,0,0.10297119617462158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,2,128,1,float16,fp8,0,0.10478719472885131
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,4,128,1,float16,float16,0,0.013537600636482239
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,2,128,1,fp8,fp8,0,0.1049615979194641
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,4,128,1,float16,float16,0,0.1111456036567688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,4,128,1,float16,fp8,0,0.10898400545120239
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,4,128,1,fp8,fp8,0,0.10917279720306397
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,8,128,1,float16,float16,0,0.1277583956718445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,8,128,1,float16,fp8,0,0.12079360485076904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,8,128,1,float16,fp8,0,0.014230400323867798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,8,128,1,fp8,fp8,0,0.12080320119857788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,16,128,1,float16,fp8,0,0.07541919946670532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,16,128,1,fp8,fp8,0,0.07540320158004761
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,1,128,1,float16,float16,0,0.057492798566818236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,1,128,1,float16,fp8,0,0.057051199674606326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,1,128,1,fp8,fp8,0,0.05734720230102539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,2,128,1,float16,float16,0,0.05923519730567932
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,16,2,128,1,float16,float16,0,0.19516799449920655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,2,128,1,float16,fp8,0,0.05774400234222412
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,2,128,1,fp8,fp8,0,0.05745599865913391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,4,128,1,float16,float16,0,0.06348320245742797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,4,128,1,float16,fp8,0,0.0594864010810852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,4,128,1,fp8,fp8,0,0.059915202856063846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,16,2,128,1,fp8,fp8,0,0.19543039798736572
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,8,128,1,float16,float16,0,0.07172799706459046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,8,128,1,float16,fp8,0,0.06439840197563171
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,8,128,1,fp8,fp8,0,0.06439679861068726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,16,128,1,float16,float16,0,0.04975680112838745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,16,128,1,fp8,fp8,0,0.040375998616218566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,1,128,1,float16,float16,0,0.028518399596214293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,1,128,1,float16,fp8,0,0.029145601391792297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,1,128,1,fp8,fp8,0,0.02915999889373779
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,2,128,1,float16,fp8,0,0.029241600632667543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,2,128,1,fp8,fp8,0,0.029339200258255003
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,4,128,1,float16,float16,0,0.035534399747848514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,4,128,1,float16,fp8,0,0.02969920039176941
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,4,128,1,fp8,fp8,0,0.02945759892463684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,8,128,1,float16,float16,0,0.040884798765182494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,8,128,1,float16,fp8,0,0.03326399922370911
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,8,128,1,fp8,fp8,0,0.03292160034179688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,16,128,1,float16,float16,0,0.027833598852157592
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,16,128,1,float16,fp8,0,0.020329600572586058
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,16,1,128,1,fp8,fp8,0,0.10332000255584717
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,1,128,1,float16,float16,0,0.01756319999694824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,1,128,1,float16,fp8,0,0.01801439970731735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,2,128,1,float16,float16,0,0.017878399789333345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,2,128,1,float16,fp8,0,0.01809920072555542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,2,128,1,fp8,fp8,0,0.017998400330543517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,4,128,1,float16,float16,0,0.01826079934835434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,16,4,128,1,fp8,fp8,0,0.014019200205802917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,4,128,1,float16,fp8,0,0.018054400384426118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,4,128,1,fp8,fp8,0,0.018262399733066557
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,16,16,128,1,float16,float16,0,0.09206879734992982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,8,128,1,float16,fp8,0,0.018911999464035035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,8,128,1,fp8,fp8,0,0.01876160055398941
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,16,128,1,float16,float16,0,0.01501920074224472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,16,128,1,fp8,fp8,0,0.013961599767208099
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,1,128,1,float16,float16,0,0.01282079964876175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,1,128,1,float16,fp8,0,0.01311040073633194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,1,128,1,fp8,fp8,0,0.01316159963607788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,2,128,1,float16,float16,0,0.012948800623416901
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,2,128,1,float16,fp8,0,0.01297920048236847
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,2,128,1,fp8,fp8,0,0.01310880035161972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,4,128,1,float16,float16,0,0.013196800649166108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,4,128,1,float16,fp8,0,0.013169600069522858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,16,128,1,float16,fp8,0,0.04022080004215241
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,16,2,128,1,float16,float16,0,0.029963201284408568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,4,128,1,fp8,fp8,0,0.013147200644016265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,8,128,1,float16,float16,0,0.013777600228786468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,8,128,1,fp8,fp8,0,0.01343040019273758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,16,128,1,float16,fp8,0,0.013079999387264252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,16,128,1,fp8,fp8,0,0.012956799566745758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,1,128,1,float16,float16,0,0.012275200337171555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,16,128,1,fp8,fp8,0,0.020227199792861937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,1,128,1,float16,fp8,0,0.012627199292182922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,2,128,1,float16,float16,0,0.012561599910259246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,1,128,1,fp8,fp8,0,0.018083199858665466
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,2,128,1,float16,fp8,0,0.012771199643611907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,2,128,1,fp8,fp8,0,0.012720000743865967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,4,128,1,float16,float16,0,0.012494400143623352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,4,128,1,float16,fp8,0,0.012630400061607362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,8,128,1,float16,float16,0,0.013022400438785553
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,8,128,1,float16,fp8,0,0.012875199317932129
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,8,128,1,fp8,fp8,0,0.01273760050535202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,16,8,128,1,float16,float16,0,0.019572800397872923
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,16,128,1,float16,float16,0,0.012806400656700134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,16,128,1,float16,fp8,0,0.012583999335765839
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,16,128,1,fp8,fp8,0,0.01242400035262108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,1,128,1,float16,float16,0,0.012171199917793274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,1,128,1,float16,fp8,0,0.012390399724245072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,1,128,1,fp8,fp8,0,0.012347199767827988
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,2,128,1,float16,float16,0,0.012052799761295318
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,2,128,1,float16,fp8,0,0.012476799637079239
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,16,128,1,float16,fp8,0,0.01383039951324463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,4,128,1,float16,float16,0,0.012111999839544297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,4,128,1,float16,fp8,0,0.012488000094890594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,8,128,1,float16,float16,0,0.0124719999730587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,8,128,1,fp8,fp8,0,0.012566399574279786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,16,128,1,float16,float16,0,0.01223519966006279
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,16,128,1,float16,fp8,0,0.012481600046157837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,16,128,1,fp8,fp8,0,0.012513600289821625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,1,128,1,float16,float16,0,0.012142399698495865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,1,128,1,float16,fp8,0,0.012191999703645706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,1,128,1,fp8,fp8,0,0.011999999731779098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,2,128,1,float16,float16,0,0.011935999989509583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,2,128,1,float16,fp8,0,0.012299200147390365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,2,128,1,fp8,fp8,0,0.01231520026922226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,4,128,1,float16,float16,0,0.011915200203657151
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,4,128,1,float16,fp8,0,0.012201599776744843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,4,128,1,fp8,fp8,0,0.012223999947309494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,16,8,128,1,float16,fp8,0,0.013391999900341034
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,8,128,1,float16,float16,0,0.011825600266456604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,8,128,1,float16,fp8,0,0.012038400024175644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,16,8,128,1,fp8,fp8,0,0.012204799801111221
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,16,1,128,1,float16,float16,0,0.10020960569381714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,1,128,1,fp8,fp8,0,0.012783999741077422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,16,1,128,1,float16,fp8,0,0.10416959524154663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,16,1,128,1,fp8,fp8,0,0.10454399585723877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,4,128,1,fp8,fp8,0,0.012700800597667695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,16,2,128,1,fp8,fp8,0,0.105948805809021
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,16,4,128,1,float16,float16,0,0.11154719591140747
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,2,128,1,fp8,fp8,0,0.012462399899959564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,16,4,128,1,float16,fp8,0,0.11036319732666015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,4,128,1,fp8,fp8,0,0.012460800260305405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,16,4,128,1,fp8,fp8,0,0.10937600135803223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,16,8,128,1,float16,fp8,0,0.012388800084590913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,16,8,128,1,float16,float16,0,0.13102240562438966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,16,8,128,1,fp8,fp8,0,0.12062400579452515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,16,8,128,1,float16,fp8,0,0.12015999555587768
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,16,128,1,float16,fp8,0,0.08394560217857361
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,16,128,1,fp8,fp8,0,0.08334720134735107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,1,128,1,float16,float16,0,0.05803520083427429
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,1,128,1,fp8,fp8,0,0.057915198802948
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,2,128,1,float16,float16,0,0.05980479717254639
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,2,128,1,fp8,fp8,0,0.058689600229263304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,2,128,1,float16,fp8,0,0.058278399705886844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,4,128,1,float16,float16,0,0.06428160071372986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,4,128,1,float16,fp8,0,0.06012319922447205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,4,128,1,fp8,fp8,0,0.060915201902389526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,16,16,128,1,float16,float16,0,0.013568000495433807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,8,128,1,float16,float16,0,0.07184640169143677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,8,128,1,float16,fp8,0,0.0652895987033844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,8,128,1,fp8,fp8,0,0.06519359946250916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,16,128,1,float16,float16,0,0.05667999982833862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,16,128,1,float16,fp8,0,0.04384799897670746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,16,128,1,fp8,fp8,0,0.04385600090026855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,1,128,1,float16,float16,0,0.028566399216651918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,1,128,1,float16,fp8,0,0.02940639853477478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,1,128,1,fp8,fp8,0,0.029249599575996398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,2,128,1,float16,float16,0,0.030268800258636475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,2,128,1,float16,fp8,0,0.02948479950428009
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,2,128,1,fp8,fp8,0,0.029372799396514892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,4,128,1,float16,float16,0,0.035848000645637514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,4,128,1,float16,fp8,0,0.029844799637794496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,4,128,1,fp8,fp8,0,0.029529601335525513
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,8,128,1,float16,float16,0,0.04051199853420258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,8,128,1,float16,fp8,0,0.033108800649642944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,16,8,128,1,fp8,fp8,0,0.032716798782348636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,16,128,1,float16,float16,0,0.029049599170684816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,16,2,128,1,float16,float16,0,0.1034816026687622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,16,128,1,fp8,fp8,0,0.021726399660110474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,1,128,1,float16,float16,0,0.017859199643135072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,16,2,128,1,float16,fp8,0,0.10567679405212402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,2,128,1,float16,float16,0,0.018198400735855103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,2,128,1,fp8,fp8,0,0.018318399786949158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,2,128,1,float16,fp8,0,0.018512000143527985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,4,128,1,float16,float16,0,0.018489600718021394
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,4,128,1,float16,fp8,0,0.018702399730682374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,4,128,1,fp8,fp8,0,0.01855359971523285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,8,128,1,float16,fp8,0,0.019092799723148347
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,8,128,1,fp8,fp8,0,0.019113600254058838
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,16,128,1,float16,float16,0,0.01562879979610443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,16,128,1,float16,fp8,0,0.014110399782657624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,16,128,1,fp8,fp8,0,0.014243200421333313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,1,128,1,float16,float16,0,0.012643200159072877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,16,128,1,float16,float16,0,0.10449440479278564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,1,128,1,float16,fp8,0,0.012827199697494508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,16,1,128,1,float16,fp8,0,0.05810239911079407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,2,128,1,float16,fp8,0,0.013180799782276154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,2,128,1,fp8,fp8,0,0.012988799810409546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,4,128,1,float16,float16,0,0.012972800433635712
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,4,128,1,float16,fp8,0,0.01316159963607788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,4,128,1,fp8,fp8,0,0.013196800649166108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,8,128,1,float16,float16,0,0.013680000603199006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,8,128,1,float16,fp8,0,0.013385599851608277
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,8,128,1,fp8,fp8,0,0.013366399705410004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,16,128,1,float16,float16,0,0.011966399848461151
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,16,128,1,float16,fp8,0,0.011062400043010711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,16,128,1,fp8,fp8,0,0.011027199774980545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,1,128,1,float16,float16,0,0.012516799569129943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,1,128,1,float16,fp8,0,0.012822400033473968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,1,128,1,fp8,fp8,0,0.012783999741077422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,2,128,1,float16,float16,0,0.01268640011548996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,2,128,1,float16,fp8,0,0.012852799892425538
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,2,128,1,fp8,fp8,0,0.012806400656700134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,1,128,1,float16,fp8,0,0.018539200723171233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,4,128,1,float16,fp8,0,0.012859199941158295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,4,128,1,fp8,fp8,0,0.012604799866676331
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,8,128,1,float16,float16,0,0.01302880048751831
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,8,128,1,float16,float16,0,0.02001280039548874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,8,128,1,fp8,fp8,0,0.013048000633716583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,16,128,1,float16,float16,0,0.011049599945545196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,16,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,1,128,1,float16,float16,0,0.012174399942159653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,1,128,1,float16,fp8,0,0.012479999661445617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,1,128,1,fp8,fp8,0,0.012247999757528305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,2,128,1,float16,float16,0,0.012166400253772736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,2,128,1,float16,fp8,0,0.012652799487113953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,1,128,1,fp8,fp8,0,0.012878400087356568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,2,128,1,fp8,fp8,0,0.012579199671745301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,4,128,1,float16,float16,0,0.0119439996778965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,4,128,1,float16,fp8,0,0.012151999771595002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,4,128,1,fp8,fp8,0,0.012211199849843979
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,8,128,1,float16,float16,0,0.012171199917793274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,8,128,1,float16,fp8,0,0.01268800050020218
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,8,128,1,fp8,fp8,0,0.012390399724245072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,16,128,1,float16,float16,0,0.010592000186443329
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,16,128,1,float16,fp8,0,0.010113599896430969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,16,128,1,fp8,fp8,0,0.009976000338792802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,1,128,1,float16,float16,0,0.011582399904727935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,1,128,1,float16,fp8,0,0.011838400363922119
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,1,128,1,fp8,fp8,0,0.011875200271606445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,2,128,1,float16,float16,0,0.011691199988126755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,2,128,1,float16,fp8,0,0.011959999799728394
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,2,128,1,fp8,fp8,0,0.012004800140857697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,4,128,1,float16,float16,0,0.011822400242090225
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,16,128,1,float16,fp8,0,0.021668800711631776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,4,128,1,float16,fp8,0,0.012289600074291229
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,4,128,1,fp8,fp8,0,0.012326399981975555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,16,1,128,1,fp8,fp8,0,0.018542400002479552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,8,128,1,float16,fp8,0,0.012137600034475327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,8,128,1,fp8,fp8,0,0.012142399698495865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,16,128,1,float16,float16,0,0.010179200023412705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,16,128,1,float16,fp8,0,0.009587199985980987
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,16,128,1,fp8,fp8,0,0.009550400078296661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,1,128,1,float16,fp8,0,0.011819200217723846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,1,128,1,fp8,fp8,0,0.011931200325489045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,8,128,1,float16,fp8,0,0.012777599692344665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,2,128,1,float16,float16,0,0.011716800183057785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,16,16,128,1,float16,fp8,0,0.01053439974784851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,2,128,1,float16,fp8,0,0.012030400335788727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,2,128,1,fp8,fp8,0,0.011924800276756287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,4,128,1,float16,float16,0,0.011619199812412263
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,4,128,1,float16,fp8,0,0.011975999921560287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,8,128,1,float16,float16,0,0.011640000343322753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,8,128,1,float16,fp8,0,0.011900799721479416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,8,128,1,fp8,fp8,0,0.01202080026268959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,16,1,128,1,float16,float16,0,0.058182400465011594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,16,1,128,1,float16,fp8,0,0.05900959968566895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,16,2,128,1,float16,float16,0,0.01284320056438446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,16,2,128,1,float16,float16,0,0.060057598352432254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,16,2,128,1,float16,fp8,0,0.05924640297889709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,16,2,128,1,fp8,fp8,0,0.05940160155296326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,16,4,128,1,float16,float16,0,0.06486880183219909
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,16,4,128,1,fp8,fp8,0,0.061433601379394534
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,16,8,128,1,float16,float16,0,0.08562560081481933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,16,8,128,1,float16,fp8,0,0.07303199768066407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,16,8,128,1,fp8,fp8,0,0.07275999784469604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,16,128,1,float16,float16,0,0.07700799703598023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,16,128,1,float16,fp8,0,0.06739360094070435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,16,128,1,fp8,fp8,0,0.06732159852981567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,1,128,1,float16,float16,0,0.02903839945793152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,1,128,1,float16,fp8,0,0.029960000514984132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,16,8,128,1,float16,float16,0,0.011884800344705581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,16,4,128,1,float16,float16,0,0.012534399330615998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,2,128,1,float16,float16,0,0.03072479963302612
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,2,128,1,float16,fp8,0,0.030225598812103273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,2,128,1,fp8,fp8,0,0.02998400032520294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,1,128,1,float16,float16,0,0.011468800157308579
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,4,128,1,float16,float16,0,0.03659839928150177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,4,128,1,float16,fp8,0,0.030236798524856567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,8,128,1,float16,float16,0,0.046449598670005796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,4,128,1,fp8,fp8,0,0.030214399099349976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,8,128,1,float16,fp8,0,0.03782399892807007
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,8,128,1,fp8,fp8,0,0.03691200017929077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,16,128,1,float16,float16,0,0.04147039949893951
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,16,128,1,float16,fp8,0,0.033817601203918454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,16,128,1,fp8,fp8,0,0.034353598952293396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,1,128,1,float16,float16,0,0.01797119975090027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,16,4,128,1,fp8,fp8,0,0.012043199688196182
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,2,128,1,float16,float16,0,0.018326400220394133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,2,128,1,float16,fp8,0,0.01903039962053299
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,4,128,1,float16,float16,0,0.018681600689888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,4,128,1,float16,fp8,0,0.019057600200176238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,16,1,128,1,fp8,fp8,0,0.05862879753112793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,4,128,1,fp8,fp8,0,0.018955199420452117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,8,128,1,float16,float16,0,0.02268480062484741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,8,128,1,float16,fp8,0,0.020824000239372253
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,8,128,1,fp8,fp8,0,0.021403199434280394
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,16,4,128,1,float16,fp8,0,0.061419200897216794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,16,128,1,float16,fp8,0,0.020417599380016326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,1,128,1,float16,float16,0,0.013409599661827087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,16,128,1,fp8,fp8,0,0.02019200026988983
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,1,128,1,float16,fp8,0,0.013691200315952301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,1,128,1,fp8,fp8,0,0.01358720064163208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,2,128,1,float16,float16,0,0.01366720050573349
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,2,128,1,float16,fp8,0,0.013739199936389923
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,4,128,1,float16,float16,0,0.013847999274730682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,4,128,1,float16,fp8,0,0.013792000710964203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,4,128,1,fp8,fp8,0,0.01374559998512268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,8,128,1,float16,float16,0,0.015278400480747223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,8,128,1,float16,fp8,0,0.014150400459766389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,8,128,1,fp8,fp8,0,0.014203199744224548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,16,128,1,float16,float16,0,0.014457599818706512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,16,128,1,float16,fp8,0,0.013449600338935852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,16,128,1,fp8,fp8,0,0.013788799941539764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,1,128,1,float16,float16,0,0.012107200175523757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,1,128,1,float16,fp8,0,0.012763200700283051
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,1,128,1,fp8,fp8,0,0.012441600114107132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,2,128,1,float16,float16,0,0.012361600250005721
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,2,128,1,float16,fp8,0,0.0126351997256279
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,2,128,1,fp8,fp8,0,0.01273760050535202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,4,128,1,float16,float16,0,0.012510399520397186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,16,1,128,1,fp8,fp8,0,0.03012320101261139
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,4,128,1,fp8,fp8,0,0.012539200484752655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,8,128,1,float16,float16,0,0.010967999696731567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,8,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,8,128,1,fp8,fp8,0,0.010595200210809707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,16,128,1,float16,float16,0,0.011214400082826615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,16,128,1,float16,fp8,0,0.010713600367307664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,16,128,1,fp8,fp8,0,0.010756800323724747
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,1,128,1,float16,fp8,0,0.019289599359035493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,1,128,1,fp8,fp8,0,0.019230400025844575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,16,2,128,1,fp8,fp8,0,0.018952000141143798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,2,128,1,float16,float16,0,0.01241919994354248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,2,128,1,float16,fp8,0,0.012654399871826172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,2,128,1,fp8,fp8,0,0.01265760064125061
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,4,128,1,float16,float16,0,0.0121568001806736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,4,128,1,float16,fp8,0,0.012350399792194367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,16,128,1,float16,float16,0,0.02152640074491501
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,4,128,1,fp8,fp8,0,0.012520000338554382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,8,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,8,128,1,fp8,fp8,0,0.010187199711799622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,16,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,16,128,1,float16,fp8,0,0.01029760017991066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,16,128,1,fp8,fp8,0,0.010273600369691849
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,16,2,128,1,fp8,fp8,0,0.013523200154304504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,1,128,1,float16,float16,0,0.0118367999792099
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,1,128,1,float16,fp8,0,0.012179200351238251
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,1,128,1,fp8,fp8,0,0.01210239976644516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,2,128,1,float16,float16,0,0.01199679970741272
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,2,128,1,float16,fp8,0,0.011964800208806992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,2,128,1,fp8,fp8,0,0.012062399834394454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,4,128,1,float16,float16,0,0.011720000207424164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,4,128,1,float16,fp8,0,0.012299200147390365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,4,128,1,fp8,fp8,0,0.012067200243473053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,8,128,1,float16,float16,0,0.010168000310659408
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,8,128,1,float16,fp8,0,0.010068800300359726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,16,8,128,1,fp8,fp8,0,0.009974399954080582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,16,128,1,float16,float16,0,0.010308799892663955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,16,128,1,fp8,fp8,0,0.010179200023412705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,1,128,1,float16,float16,0,0.011823999881744384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,1,128,1,float16,fp8,0,0.012076800316572189
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,16,4,128,1,float16,fp8,0,0.012452799826860428
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,1,128,1,fp8,fp8,0,0.012113600224256515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,2,128,1,float16,float16,0,0.011883199959993363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,2,128,1,float16,fp8,0,0.01186719983816147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,2,128,1,fp8,fp8,0,0.012088000029325485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,4,128,1,float16,float16,0,0.011499200016260147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,4,128,1,float16,fp8,0,0.011998400092124939
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,4,128,1,fp8,fp8,0,0.011876799911260606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,8,128,1,float16,float16,0,0.010063999891281128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,8,128,1,float16,fp8,0,0.009494400024414063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,8,128,1,fp8,fp8,0,0.009729599952697754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,16,128,1,float16,float16,0,0.010063999891281128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,16,128,1,float16,fp8,0,0.009668800234794616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,16,128,1,fp8,fp8,0,0.009576000273227692
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,1,128,1,float16,float16,0,0.011582399904727935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,1,128,1,float16,fp8,0,0.011846400052309036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,1,128,1,fp8,fp8,0,0.012027200311422348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,1,128,1,float16,float16,0,0.012401600182056428
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,2,128,1,fp8,fp8,0,0.012001600116491318
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,4,128,1,float16,float16,0,0.011499200016260147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,4,128,1,float16,fp8,0,0.011926399916410447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,4,128,1,fp8,fp8,0,0.011550399661064147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,8,128,1,float16,float16,0,0.010041599720716476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,8,128,1,float16,fp8,0,0.009324800223112106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,8,128,1,fp8,fp8,0,0.00987199991941452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,16,1,128,1,float16,float16,0,0.03065440058708191
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,16,1,128,1,float16,fp8,0,0.03261919915676117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,16,1,128,1,fp8,fp8,0,0.03181119859218597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,16,2,128,1,float16,float16,0,0.032150399684906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,16,2,128,1,float16,fp8,0,0.03196640014648437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,16,2,128,1,fp8,fp8,0,0.03233599960803986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,8,128,1,float16,float16,0,0.010686399787664414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,16,4,128,1,float16,float16,0,0.04317759871482849
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,16,4,128,1,float16,fp8,0,0.03684639930725098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,16,4,128,1,fp8,fp8,0,0.036668801307678224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,16,8,128,1,float16,float16,0,0.06875680088996887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,16,8,128,1,float16,fp8,0,0.061812800168991086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,16,8,128,1,fp8,fp8,0,0.06151999831199646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,16,128,1,float16,fp8,0,0.05663679838180542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,1,128,1,float16,float16,0,0.01934400051832199
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,1,128,1,float16,fp8,0,0.02045599967241287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,1,128,1,fp8,fp8,0,0.020337599515914916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,2,128,1,float16,float16,0,0.019843199849128725
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,2,128,1,float16,fp8,0,0.02034880071878433
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,16,16,128,1,float16,fp8,0,0.009822399914264679
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,4,128,1,float16,float16,0,0.02263839989900589
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,4,128,1,float16,fp8,0,0.021862399578094483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,4,128,1,fp8,fp8,0,0.02195200026035309
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,8,128,1,float16,float16,0,0.03469600081443787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,8,128,1,float16,fp8,0,0.03319680094718933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,8,128,1,fp8,fp8,0,0.033225598931312564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,16,128,1,float16,float16,0,0.03380959928035736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,16,128,1,float16,fp8,0,0.032153600454330446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,16,128,1,fp8,fp8,0,0.03203360140323639
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,2,128,1,float16,float16,0,0.011510399729013443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,1,128,1,float16,float16,0,0.013900800049304963
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,1,128,1,float16,fp8,0,0.014265599846839904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,1,128,1,fp8,fp8,0,0.012707200646400452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,1,128,1,fp8,fp8,0,0.014201599359512328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,2,128,1,float16,float16,0,0.0141184002161026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,2,128,1,float16,fp8,0,0.014248000085353851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,2,128,1,fp8,fp8,0,0.014267200231552124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,4,128,1,float16,float16,0,0.015238399803638458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,4,128,1,float16,fp8,0,0.014526399970054626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,4,128,1,fp8,fp8,0,0.014587199687957764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,8,128,1,float16,float16,0,0.021188800036907197
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,8,128,1,float16,fp8,0,0.020494399964809416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,16,8,128,1,fp8,fp8,0,0.020401600003242492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,16,128,1,float16,float16,0,0.0204815998673439
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,16,128,1,float16,fp8,0,0.01934559941291809
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,16,128,1,fp8,fp8,0,0.019470399618148802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,1,128,1,float16,float16,0,0.012761600315570831
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,1,128,1,float16,fp8,0,0.013443200290203095
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,1,128,1,fp8,fp8,0,0.01297920048236847
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,2,128,1,float16,float16,0,0.012969599664211273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,2,128,1,float16,fp8,0,0.013009600341320038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,2,128,1,fp8,fp8,0,0.013419200479984284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,4,128,1,float16,float16,0,0.011299200356006622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,4,128,1,float16,fp8,0,0.01109279990196228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,16,128,1,fp8,fp8,0,0.05613279938697815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,8,128,1,float16,float16,0,0.014347200095653535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,8,128,1,float16,fp8,0,0.013593600690364837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,8,128,1,fp8,fp8,0,0.013841600716114044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,16,128,1,float16,float16,0,0.013790400326251983
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,16,128,1,fp8,fp8,0,0.013167999684810638
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,1,128,1,float16,float16,0,0.011964800208806992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,1,128,1,float16,fp8,0,0.012547199428081513
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,1,128,1,fp8,fp8,0,0.012332800030708312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,2,128,1,float16,float16,0,0.012038400024175644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,2,128,1,float16,fp8,0,0.012620800733566284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,2,128,1,fp8,fp8,0,0.012742400169372559
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,4,128,1,float16,float16,0,0.010768000036478043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,4,128,1,float16,fp8,0,0.010228800028562546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,4,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,8,128,1,float16,float16,0,0.011062400043010711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,8,128,1,float16,fp8,0,0.010711999982595444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,8,128,1,fp8,fp8,0,0.010707200318574906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,16,128,1,float16,float16,0,0.010937599837779999
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,16,128,1,float16,fp8,0,0.010609599947929382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,16,2,128,1,float16,fp8,0,0.011912000179290772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,16,128,1,fp8,fp8,0,0.010611200332641601
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,16,1,128,1,float16,fp8,0,0.012372799962759019
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,1,128,1,float16,float16,0,0.011737599968910217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,2,128,1,float16,float16,0,0.012035199999809265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,2,128,1,float16,fp8,0,0.012127999961376191
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,2,128,1,fp8,fp8,0,0.012177599966526032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,4,128,1,float16,float16,0,0.010305599868297577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,4,128,1,float16,fp8,0,0.009974399954080582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,4,128,1,fp8,fp8,0,0.010289599746465683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,8,128,1,float16,float16,0,0.01064639985561371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,8,128,1,float16,fp8,0,0.010308799892663955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,8,128,1,fp8,fp8,0,0.010283199697732925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,16,128,1,float16,float16,0,0.01032480001449585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,16,128,1,float16,float16,0,0.06366400122642517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,16,128,1,float16,fp8,0,0.009803199768066406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,16,128,1,fp8,fp8,0,0.009995199739933014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,16,4,128,1,fp8,fp8,0,0.010990399867296219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,1,128,1,float16,float16,0,0.011510399729013443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,1,128,1,float16,fp8,0,0.011875200271606445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,2,128,1,float16,float16,0,0.011684799939393998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,1,128,1,fp8,fp8,0,0.01183359995484352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,2,128,1,float16,fp8,0,0.011856000125408172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,2,128,1,fp8,fp8,0,0.011990399658679962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,4,128,1,float16,float16,0,0.009860800206661224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,4,128,1,float16,fp8,0,0.00965920016169548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,8,128,1,float16,float16,0,0.00987040027976036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,8,128,1,float16,fp8,0,0.00987360030412674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,8,128,1,fp8,fp8,0,0.00984639972448349
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,16,128,1,float16,float16,0,0.010209599882364273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,16,2,128,1,fp8,fp8,0,0.020207999646663664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,16,128,1,fp8,fp8,0,0.009913600236177444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,1,128,1,float16,fp8,0,0.011878400295972823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,1,128,1,fp8,fp8,0,0.011744000017642975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,2,128,1,float16,float16,0,0.011398400366306304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,2,128,1,float16,fp8,0,0.011767999827861786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,2,128,1,fp8,fp8,0,0.011803200095891952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,4,128,1,float16,float16,0,0.009721600264310837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,4,128,1,float16,fp8,0,0.00952960029244423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,4,128,1,fp8,fp8,0,0.009513600170612336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,8,128,1,float16,float16,0,0.00986879989504814
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,8,128,1,float16,fp8,0,0.009679999947547913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,8,128,1,fp8,fp8,0,0.009700799733400345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,16,128,1,float16,float16,0,0.010232000052928925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,16,128,1,float16,fp8,0,0.009388799965381622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,16,128,1,fp8,fp8,0,0.009625600278377533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,1,128,1,float16,float16,0,0.011268799751996994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,1,128,1,float16,fp8,0,0.011713600158691407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,1,128,1,fp8,fp8,0,0.011588799953460693
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,1,128,1,float16,fp8,0,0.012100800126791
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,2,128,1,float16,fp8,0,0.011664000153541566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,16,1,128,1,fp8,fp8,0,0.012012799829244613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,4,128,1,float16,float16,0,0.00957920029759407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,4,128,1,float16,fp8,0,0.009223999828100205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,8,128,1,float16,float16,0,0.009603200107812881
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,8,128,1,float16,fp8,0,0.009398400038480758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,8,128,1,fp8,fp8,0,0.009464000165462495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,16,1,128,1,float16,float16,0,0.017795200645923614
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,16,16,128,1,float16,fp8,0,0.013337600231170654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,16,4,128,1,fp8,fp8,0,0.009654399752616883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,16,1,128,1,float16,fp8,0,0.018083199858665466
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,16,1,128,1,fp8,fp8,0,0.017843200266361235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,16,128,1,float16,fp8,0,0.009854400157928466
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,16,2,128,1,float16,float16,0,0.023681600391864777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,16,2,128,1,fp8,fp8,0,0.023446400463581086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,16,1,128,1,float16,float16,0,0.011475200206041336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,16,4,128,1,float16,fp8,0,0.034255999326705935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,16,4,128,1,fp8,fp8,0,0.034360000491142274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,16,8,128,1,float16,float16,0,0.05573760271072388
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,16,8,128,1,float16,fp8,0,0.05579040050506592
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,16,128,1,float16,float16,0,0.05371999740600586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,16,128,1,float16,fp8,0,0.05379040241241455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,16,128,1,fp8,fp8,0,0.053825598955154416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,1,128,1,float16,float16,0,0.012966400384902954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,1,128,1,float16,fp8,0,0.012449599802494049
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,1,128,1,fp8,fp8,0,0.012537600100040435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,2,128,1,float16,float16,0,0.011494400352239609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,2,128,1,float16,float16,0,0.015345600247383118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,2,128,1,float16,fp8,0,0.015267199277877808
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,2,128,1,fp8,fp8,0,0.011822400242090225
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,2,128,1,fp8,fp8,0,0.0154448002576828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,4,128,1,float16,float16,0,0.021070399880409242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,16,4,128,1,fp8,fp8,0,0.009372799843549728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,4,128,1,float16,fp8,0,0.02091200053691864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,8,128,1,float16,float16,0,0.032201600074768064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,8,128,1,float16,fp8,0,0.031727999448776245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,8,128,1,fp8,fp8,0,0.03166080117225647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,16,128,1,float16,fp8,0,0.030972799658775328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,16,128,1,fp8,fp8,0,0.03091840147972107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,1,128,1,float16,float16,0,0.01117440015077591
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,1,128,1,float16,fp8,0,0.01168000027537346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,1,128,1,fp8,fp8,0,0.011284799873828888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,2,128,1,float16,float16,0,0.011737599968910217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,2,128,1,float16,fp8,0,0.011552000045776367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,2,128,1,fp8,fp8,0,0.01167680025100708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,4,128,1,float16,float16,0,0.01408800035715103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,4,128,1,float16,fp8,0,0.014267200231552124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,4,128,1,fp8,fp8,0,0.01401440054178238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,8,128,1,float16,float16,0,0.019894400238990785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,8,128,1,float16,fp8,0,0.019515199959278105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,8,128,1,fp8,fp8,0,0.01968639940023422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,16,128,1,float16,float16,0,0.019219200313091277
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,16,128,1,float16,fp8,0,0.018795199692249298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,16,128,1,fp8,fp8,0,0.018900799751281738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,1,128,1,float16,float16,0,0.01067039966583252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,1,128,1,float16,fp8,0,0.010651200264692306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,1,128,1,fp8,fp8,0,0.01064639985561371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,2,128,1,float16,fp8,0,0.010755199939012527
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,2,128,1,fp8,fp8,0,0.01074879989027977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,16,2,128,1,float16,fp8,0,0.023396800458431243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,4,128,1,float16,float16,0,0.010819199681282043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,16,4,128,1,float16,float16,0,0.03487200140953064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,4,128,1,float16,fp8,0,0.010619200021028518
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,4,128,1,fp8,fp8,0,0.010532800108194351
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,8,128,1,float16,fp8,0,0.013255999982357025
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,8,128,1,fp8,fp8,0,0.012982399761676788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,16,128,1,float16,fp8,0,0.012703999876976013
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,16,8,128,1,fp8,fp8,0,0.05594720244407654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,1,128,1,float16,float16,0,0.010027199983596802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,1,128,1,float16,fp8,0,0.009905599802732468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,1,128,1,fp8,fp8,0,0.010047999769449234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,2,128,1,float16,float16,0,0.010171200335025787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,2,128,1,float16,fp8,0,0.010092800110578537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,2,128,1,fp8,fp8,0,0.010147199779748917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,4,128,1,float16,float16,0,0.01011200025677681
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,4,128,1,float16,fp8,0,0.010047999769449234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,4,128,1,fp8,fp8,0,0.009961599856615067
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,8,128,1,float16,float16,0,0.010175999999046326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,8,128,1,float16,fp8,0,0.010182400047779084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,8,128,1,fp8,fp8,0,0.010094399750232696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,16,4,128,1,fp8,fp8,0,0.02112320065498352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,16,128,1,float16,float16,0,0.0106175996363163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,16,128,1,fp8,fp8,0,0.010174400359392165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,1,128,1,float16,float16,0,0.009784000366926194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,1,128,1,float16,fp8,0,0.009748800098896027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,2,128,1,float16,float16,0,0.009796799719333648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,2,128,1,float16,fp8,0,0.009431999921798707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,2,128,1,fp8,fp8,0,0.009428799897432328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,4,128,1,float16,float16,0,0.009830400347709656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,4,128,1,float16,fp8,0,0.009831999987363815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,4,128,1,fp8,fp8,0,0.009841600060462951
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,8,128,1,float16,float16,0,0.010148800164461135
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,8,128,1,float16,fp8,0,0.01010880023241043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,2,128,1,float16,float16,0,0.010673599690198899
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,16,128,1,float16,float16,0,0.010123199969530105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,16,128,1,float16,fp8,0,0.009831999987363815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,16,128,1,fp8,fp8,0,0.009824000298976898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,1,128,1,float16,float16,0,0.009406399726867676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,1,128,1,float16,fp8,0,0.009348800033330917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,16,8,128,1,float16,float16,0,0.0133200004696846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,1,128,1,fp8,fp8,0,0.009328000247478485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,16,128,1,float16,float16,0,0.01297920048236847
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,2,128,1,float16,fp8,0,0.009364800155162811
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,2,128,1,fp8,fp8,0,0.009505599737167358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,16,16,128,1,fp8,fp8,0,0.01287200003862381
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,4,128,1,float16,fp8,0,0.009759999811649323
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,4,128,1,fp8,fp8,0,0.009743999689817429
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,8,128,1,float16,fp8,0,0.009803199768066406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,16,128,1,float16,fp8,0,0.009995199739933014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,16,16,128,1,float16,float16,0,0.03136639893054962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,8,128,1,fp8,fp8,0,0.009804800152778625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,16,128,1,float16,float16,0,0.010092800110578537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,16,128,1,float16,fp8,0,0.009828799962997436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,1,128,1,fp8,fp8,0,0.009750399738550186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,1,128,1,float16,float16,0,0.00968639999628067
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,1,128,1,float16,fp8,0,0.009356799721717834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,2,128,1,float16,float16,0,0.009319999814033508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,2,128,1,float16,fp8,0,0.009118399769067764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,2,128,1,fp8,fp8,0,0.009121599793434142
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,4,128,1,float16,float16,0,0.00929120033979416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,16,8,128,1,fp8,fp8,0,0.010127999633550645
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,4,128,1,fp8,fp8,0,0.009398400038480758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,8,128,1,float16,float16,0,0.009537599980831146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,8,128,1,float16,fp8,0,0.009390400350093841
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,8,128,1,fp8,fp8,0,0.009372799843549728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,16,128,1,float16,float16,0,0.009718400239944459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,16,128,1,float16,fp8,0,0.009390400350093841
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,2,128,1,float16,float16,0,0.009547200053930283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,4,128,1,float16,float16,0,0.00944959968328476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,1,128,1,float16,float16,0,0.00971359983086586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,1,128,1,float16,fp8,0,0.009332799911499023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,2,128,1,float16,float16,0,0.009576000273227692
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,2,128,1,float16,fp8,0,0.00942239984869957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,2,128,1,fp8,fp8,0,0.009249600023031235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,4,128,1,float16,float16,0,0.009319999814033508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,4,128,1,float16,fp8,0,0.009275200217962265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,4,128,1,fp8,fp8,0,0.009337600320577621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,8,128,1,float16,float16,0,0.009750399738550186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,8,128,1,float16,fp8,0,0.00963039994239807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,8,128,1,fp8,fp8,0,0.00960479974746704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,16,128,1,fp8,fp8,0,0.00958240032196045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,1,128,1,fp8,fp8,0,0.0094480000436306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,16,4,128,1,float16,fp8,0,0.009390400350093841
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,16,128,1,fp8,fp8,0,0.00942080020904541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,16,8,128,1,float16,float16,0,0.00992799997329712
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,16,1,128,1,fp8,fp8,0,0.009324800223112106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,12,1,128,1,float16,fp8,0,8.747509002685547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,12,1,128,1,fp8,fp8,0,8.977180480957031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,12,1,128,1,float16,float16,0,11.2928466796875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,12,2,128,1,float16,fp8,0,8.91872787475586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,12,2,128,1,fp8,fp8,0,9.044032287597656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,12,2,128,1,float16,float16,0,11.30722885131836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,12,4,128,1,float16,float16,0,11.56622543334961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,12,4,128,1,float16,fp8,0,9.017620849609376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,12,12,128,1,float16,float16,0,5.859257507324219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,12,1,128,1,float16,float16,0,5.665817642211914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,12,1,128,1,float16,fp8,0,4.493939208984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,12,4,128,1,fp8,fp8,0,8.896555328369141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,12,1,128,1,fp8,fp8,0,4.524628829956055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,12,2,128,1,float16,fp8,0,4.695912170410156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,12,2,128,1,float16,float16,0,5.5443473815917965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,12,2,128,1,fp8,fp8,0,4.609262466430664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,12,4,128,1,float16,float16,0,5.673294448852539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,12,4,128,1,float16,fp8,0,4.4375263214111325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,12,12,128,1,float16,float16,0,2.729622459411621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,12,4,128,1,fp8,fp8,0,4.457271957397461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,12,1,128,1,float16,fp8,0,2.1911712646484376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,12,1,128,1,float16,float16,0,2.8107200622558595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,12,1,128,1,fp8,fp8,0,2.419209671020508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,12,2,128,1,float16,float16,0,2.678303909301758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,12,2,128,1,float16,fp8,0,2.18984317779541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,12,2,128,1,fp8,fp8,0,2.344179153442383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,12,4,128,1,float16,float16,0,2.69149112701416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,12,4,128,1,float16,fp8,0,2.346139144897461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,12,12,128,1,float16,float16,0,1.3264944076538085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,12,4,128,1,fp8,fp8,0,2.2124607086181642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,12,1,128,1,float16,fp8,0,1.1022512435913085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,12,1,128,1,float16,float16,0,1.3951040267944337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,12,1,128,1,fp8,fp8,0,1.0950655937194824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,12,2,128,1,float16,float16,0,1.4029264450073242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,12,2,128,1,float16,fp8,0,1.1115360260009766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,12,2,128,1,fp8,fp8,0,1.2284048080444336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,12,4,128,1,float16,float16,0,1.3816864013671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,12,4,128,1,float16,fp8,0,1.1685919761657715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,12,4,128,1,fp8,fp8,0,1.3237775802612304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,12,1,128,1,float16,fp8,0,5.098960113525391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,12,1,128,1,float16,float16,0,6.737397003173828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,12,1,128,1,fp8,fp8,0,4.996926498413086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,12,2,128,1,float16,float16,0,6.558106994628906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,12,2,128,1,float16,fp8,0,5.077848052978515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,12,2,128,1,fp8,fp8,0,5.21833610534668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,12,4,128,1,float16,float16,0,6.690245056152344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,12,4,128,1,float16,fp8,0,5.138313674926758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,12,12,128,1,float16,float16,0,3.3558639526367187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,12,4,128,1,fp8,fp8,0,5.15380630493164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,12,12,128,1,fp8,fp8,0,2.2039264678955077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,12,12,128,1,fp8,fp8,0,2.5683183670043945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,12,12,128,1,float16,fp8,0,4.487435150146484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,12,12,128,1,fp8,fp8,0,1.1105680465698242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,12,12,128,1,fp8,fp8,0,4.479337692260742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,12,12,128,1,float16,fp8,0,2.212620735168457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,12,1,128,1,float16,float16,0,2.952836799621582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,12,12,128,1,float16,fp8,0,2.5713760375976564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,12,1,128,1,float16,fp8,0,2.477089691162109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,12,1,128,1,fp8,fp8,0,2.488991928100586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,12,2,128,1,float16,float16,0,2.937900733947754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,12,2,128,1,float16,fp8,0,2.506809616088867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,12,12,128,1,float16,fp8,0,1.1110015869140626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,12,4,128,1,float16,fp8,0,2.5544511795043947
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,12,12,128,1,float16,float16,0,1.5491151809692383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,12,4,128,1,float16,float16,0,3.275592041015625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,12,4,128,1,fp8,fp8,0,2.504057693481445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,12,12,128,1,fp8,fp8,0,1.3118127822875976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,12,12,128,1,float16,fp8,0,1.489675235748291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,12,1,128,1,float16,fp8,0,1.330947208404541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,12,1,128,1,float16,float16,0,1.437063980102539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,12,1,128,1,fp8,fp8,0,1.4076623916625977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,12,2,128,1,float16,float16,0,1.4296480178833009
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,12,2,128,1,float16,fp8,0,1.2558655738830566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,12,2,128,1,fp8,fp8,0,1.3121871948242188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,12,12,128,1,float16,fp8,0,0.6647984027862549
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,12,12,128,1,float16,float16,0,0.8808079719543457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,12,4,128,1,float16,fp8,0,1.3189519882202148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,12,4,128,1,float16,float16,0,1.4960528373718263
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,12,12,128,1,fp8,fp8,0,0.6807040214538574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,12,4,128,1,fp8,fp8,0,1.3061311721801758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,12,1,128,1,float16,float16,0,0.7122320175170899
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,12,1,128,1,float16,fp8,0,0.677836799621582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,12,1,128,1,fp8,fp8,0,0.6548223972320557
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,12,2,128,1,float16,fp8,0,0.6359392166137695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,12,2,128,1,float16,float16,0,0.7872960090637207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,12,2,128,1,fp8,fp8,0,0.6464655876159668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,12,4,128,1,float16,float16,0,0.7309648036956787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,12,4,128,1,fp8,fp8,0,0.656276798248291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,12,2,128,1,fp8,fp8,0,2.5207456588745116
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,12,1,128,1,fp8,fp8,0,3.5450958251953124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,12,1,128,1,float16,fp8,0,3.6106929779052734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,12,2,128,1,float16,fp8,0,3.5335361480712892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,12,4,128,1,float16,fp8,0,0.660591983795166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,12,2,128,1,float16,float16,0,4.446308898925781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,12,2,128,1,fp8,fp8,0,3.4767105102539064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,12,12,128,1,float16,fp8,0,1.8343055725097657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,12,12,128,1,float16,float16,0,2.2653871536254884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,12,12,128,1,fp8,fp8,0,1.9237247467041017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,12,4,128,1,fp8,fp8,0,3.5468639373779296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,12,4,128,1,float16,fp8,0,3.6097553253173826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,12,4,128,1,float16,float16,0,4.426078414916992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,12,1,128,1,float16,float16,0,4.496335983276367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,12,1,128,1,float16,fp8,0,1.739044761657715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,12,1,128,1,float16,float16,0,2.0217567443847657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,12,1,128,1,fp8,fp8,0,1.906398391723633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,12,2,128,1,float16,float16,0,2.0652048110961916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,12,2,128,1,float16,fp8,0,1.7718927383422851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,12,2,128,1,fp8,fp8,0,1.7889743804931642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,12,12,128,1,float16,fp8,0,0.952780818939209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,12,4,128,1,float16,fp8,0,1.760526466369629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,12,12,128,1,float16,float16,0,1.2324735641479492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,12,4,128,1,fp8,fp8,0,1.7675472259521485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,12,4,128,1,float16,float16,0,2.2650896072387696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,12,1,128,1,float16,fp8,0,0.8991168022155762
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,12,1,128,1,float16,float16,0,1.0113311767578126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,12,1,128,1,fp8,fp8,0,0.8813344001770019
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,12,2,128,1,float16,float16,0,1.0202143669128418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,12,2,128,1,fp8,fp8,0,0.8825648307800293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,12,4,128,1,float16,fp8,0,0.8976943969726563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,12,12,128,1,float16,float16,0,0.5824063777923584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,12,4,128,1,fp8,fp8,0,0.9985983848571778
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,12,12,128,1,float16,fp8,0,0.49552001953125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,12,12,128,1,fp8,fp8,0,0.4820831775665283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,12,1,128,1,float16,float16,0,0.5137152194976806
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,12,1,128,1,float16,fp8,0,0.44658398628234863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,12,1,128,1,fp8,fp8,0,0.47817440032958985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,12,2,128,1,float16,fp8,0,0.4465888023376465
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,12,2,128,1,fp8,fp8,0,0.4528319835662842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,12,4,128,1,float16,float16,0,0.5175087928771973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,12,4,128,1,float16,fp8,0,0.4637807846069336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,12,4,128,1,fp8,fp8,0,0.4500144004821777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,12,12,128,1,fp8,fp8,0,1.0634559631347655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,12,2,128,1,float16,fp8,0,0.9668224334716797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,12,4,128,1,float16,float16,0,1.1351887702941894
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,12,2,128,1,float16,float16,0,0.5386544227600097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,12,1,128,1,float16,fp8,0,4.609846496582032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,12,1,128,1,fp8,fp8,0,4.572153472900391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,12,1,128,1,float16,float16,0,5.868803024291992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,12,2,128,1,float16,float16,0,5.586032104492188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,12,2,128,1,float16,fp8,0,4.562174224853516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,12,2,128,1,fp8,fp8,0,4.610859298706055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,12,12,128,1,float16,float16,0,3.038540840148926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,12,4,128,1,float16,fp8,0,4.649115371704101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,12,12,128,1,float16,fp8,0,2.5489871978759764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,12,4,128,1,float16,float16,0,5.845428848266602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,12,12,128,1,fp8,fp8,0,2.582035255432129
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,12,1,128,1,float16,float16,0,2.7516559600830077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,12,1,128,1,float16,fp8,0,2.296342468261719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,12,4,128,1,fp8,fp8,0,4.700384140014648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,12,1,128,1,fp8,fp8,0,2.5935247421264647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,12,2,128,1,float16,fp8,0,2.4039327621459963
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,12,2,128,1,float16,float16,0,2.6637247085571287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,12,2,128,1,fp8,fp8,0,2.386508750915527
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,12,4,128,1,fp8,fp8,0,2.324073600769043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,12,4,128,1,float16,float16,0,2.785406494140625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,12,4,128,1,float16,fp8,0,2.5243503570556642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,12,12,128,1,float16,fp8,0,1.2137167930603028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,12,12,128,1,fp8,fp8,0,1.233839988708496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,12,1,128,1,float16,fp8,0,1.1656975746154785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,12,1,128,1,float16,float16,0,1.4428848266601562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,12,1,128,1,fp8,fp8,0,1.1670448303222656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,12,2,128,1,float16,fp8,0,1.1494447708129882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,12,2,128,1,fp8,fp8,0,1.144647979736328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,12,4,128,1,float16,fp8,0,1.1630240440368653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,12,4,128,1,float16,float16,0,1.3785951614379883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,12,4,128,1,fp8,fp8,0,1.2774671554565429
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,12,12,128,1,float16,fp8,0,0.603876781463623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,12,12,128,1,fp8,fp8,0,0.6114096164703369
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,12,1,128,1,float16,float16,0,0.724894380569458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,12,12,128,1,float16,float16,0,1.4085344314575194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,12,1,128,1,float16,fp8,0,0.6078720092773438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,12,1,128,1,fp8,fp8,0,0.5743648052215576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,12,2,128,1,float16,float16,0,0.6692975997924805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,12,2,128,1,float16,fp8,0,0.6315120220184326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,12,2,128,1,fp8,fp8,0,0.5754767894744873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,12,4,128,1,float16,float16,0,0.6798336029052734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,12,4,128,1,float16,fp8,0,0.5826735973358155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,12,4,128,1,fp8,fp8,0,0.5773263931274414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,12,12,128,1,float16,fp8,0,0.30588479042053224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,12,12,128,1,fp8,fp8,0,0.30880959033966066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,12,1,128,1,float16,float16,0,0.3344480037689209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,12,1,128,1,float16,fp8,0,0.3012991905212402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,12,1,128,1,fp8,fp8,0,0.2911184072494507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,12,2,128,1,float16,float16,0,0.3399104118347168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,12,2,128,1,float16,fp8,0,0.2928992033004761
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,12,2,128,1,fp8,fp8,0,0.2980448007583618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,12,4,128,1,float16,float16,0,0.3399168014526367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,12,4,128,1,float16,fp8,0,0.29815359115600587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,12,2,128,1,float16,float16,0,1.425374412536621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,12,4,128,1,fp8,fp8,0,0.2967216014862061
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,12,12,128,1,float16,float16,0,0.7115695953369141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,12,1,128,1,float16,fp8,0,2.5965503692626952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,12,1,128,1,fp8,fp8,0,2.632281684875488
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,12,12,128,1,float16,float16,0,0.37298879623413084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,12,2,128,1,float16,fp8,0,2.649201583862305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,12,1,128,1,float16,float16,0,3.182569694519043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,12,2,128,1,fp8,fp8,0,2.6436912536621096
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,12,2,128,1,float16,float16,0,3.211307144165039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,12,12,128,1,fp8,fp8,0,1.4284192085266114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,12,1,128,1,float16,float16,0,1.6365039825439454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,12,1,128,1,float16,fp8,0,1.3093647956848145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,12,4,128,1,fp8,fp8,0,2.671463966369629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,12,4,128,1,float16,fp8,0,2.706889533996582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,12,4,128,1,float16,float16,0,3.329723358154297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,12,1,128,1,fp8,fp8,0,1.326249599456787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,12,2,128,1,float16,float16,0,1.6215007781982422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,12,2,128,1,fp8,fp8,0,1.3758383750915528
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,12,4,128,1,float16,float16,0,1.615996742248535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,12,4,128,1,fp8,fp8,0,1.3414608001708985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,12,4,128,1,float16,fp8,0,1.5091440200805664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,12,12,128,1,float16,fp8,0,0.7313712120056153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,12,12,128,1,fp8,fp8,0,0.8141119956970215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,12,12,128,1,float16,fp8,0,1.5459872245788575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,12,1,128,1,float16,fp8,0,0.6877344131469727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,12,1,128,1,float16,float16,0,0.7605279922485352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,12,2,128,1,float16,float16,0,0.8040335655212403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,12,2,128,1,float16,fp8,0,0.6593535900115967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,12,2,128,1,fp8,fp8,0,0.6769360065460205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,12,4,128,1,float16,float16,0,0.8101167678833008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,12,12,128,1,float16,float16,0,0.4399375915527344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,12,4,128,1,float16,fp8,0,0.7085440158843994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,12,12,128,1,float16,fp8,0,0.39875040054321287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,12,4,128,1,fp8,fp8,0,0.6712240219116211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,12,12,128,1,fp8,fp8,0,0.3615135908126831
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,12,2,128,1,float16,fp8,0,1.3177663803100585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,12,1,128,1,fp8,fp8,0,0.3350879907608032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,12,1,128,1,float16,fp8,0,0.35956640243530275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,12,2,128,1,float16,float16,0,0.387940788269043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,12,2,128,1,float16,fp8,0,0.3383615970611572
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,12,4,128,1,float16,float16,0,0.40604000091552733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,12,4,128,1,float16,fp8,0,0.3423536062240601
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,12,4,128,1,fp8,fp8,0,0.34281439781188966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,12,12,128,1,float16,float16,0,0.9196000099182129
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,12,12,128,1,float16,fp8,0,0.18628480434417724
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,12,12,128,1,fp8,fp8,0,0.18662879467010499
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,12,1,128,1,float16,float16,0,0.20073599815368653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,12,1,128,1,float16,fp8,0,0.17655999660491944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,12,1,128,1,fp8,fp8,0,0.17618399858474731
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,12,12,128,1,float16,float16,0,1.7197343826293945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,12,2,128,1,float16,float16,0,0.20299038887023926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,12,2,128,1,float16,fp8,0,0.176529598236084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,12,2,128,1,fp8,fp8,0,0.17689759731292726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,12,4,128,1,float16,float16,0,0.20555999279022216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,12,4,128,1,float16,fp8,0,0.1781440019607544
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,12,4,128,1,fp8,fp8,0,0.17796159982681276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,12,1,128,1,float16,float16,0,0.39474239349365237
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,12,2,128,1,fp8,fp8,0,0.33749918937683104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,12,1,128,1,float16,fp8,0,2.439675140380859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,12,1,128,1,fp8,fp8,0,2.44498233795166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,12,1,128,1,float16,float16,0,2.854419136047363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,12,2,128,1,float16,float16,0,3.0016752243041993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,12,1,128,1,fp8,fp8,0,0.6724688053131104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,12,2,128,1,float16,fp8,0,2.465763282775879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,12,2,128,1,fp8,fp8,0,2.4635215759277345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,12,12,128,1,float16,fp8,0,1.3419856071472167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,12,12,128,1,float16,float16,0,1.6972879409790038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,12,4,128,1,float16,fp8,0,2.525987243652344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,12,4,128,1,float16,float16,0,3.027204895019531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,12,4,128,1,fp8,fp8,0,2.6222368240356446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,12,12,128,1,fp8,fp8,0,1.3585951805114747
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,12,12,128,1,float16,float16,0,0.24121758937835694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,12,1,128,1,float16,fp8,0,1.2266688346862793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,12,1,128,1,float16,float16,0,1.4420319557189942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,12,1,128,1,fp8,fp8,0,1.3640080451965333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,12,2,128,1,float16,float16,0,1.4257023811340332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,12,2,128,1,fp8,fp8,0,1.2484543800354004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,12,2,128,1,float16,fp8,0,1.3656576156616211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,12,4,128,1,float16,float16,0,1.4981391906738282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,12,12,128,1,float16,float16,0,0.808016014099121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,12,12,128,1,float16,fp8,0,0.7290160179138183
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,12,4,128,1,float16,fp8,0,1.265732765197754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,12,12,128,1,fp8,fp8,0,0.6775936126708985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,12,1,128,1,float16,float16,0,0.716158390045166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,12,1,128,1,float16,fp8,0,0.6686751842498779
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,12,1,128,1,fp8,fp8,0,0.6304719924926758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,12,2,128,1,float16,fp8,0,0.6282144069671631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,12,2,128,1,fp8,fp8,0,0.6246255874633789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,12,4,128,1,float16,float16,0,0.7534272193908691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,12,4,128,1,float16,fp8,0,0.6597968101501465
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,12,4,128,1,fp8,fp8,0,0.6410927772521973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,12,12,128,1,float16,fp8,0,0.34577279090881347
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,12,12,128,1,fp8,fp8,0,0.3704576015472412
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,12,1,128,1,float16,fp8,0,0.3170223951339722
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,12,2,128,1,float16,float16,0,0.37239999771118165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,12,2,128,1,float16,fp8,0,0.31967360973358155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,12,2,128,1,fp8,fp8,0,0.3318943977355957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,12,4,128,1,float16,float16,0,0.37126879692077636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,12,4,128,1,float16,fp8,0,0.32993600368499754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,12,4,128,1,fp8,fp8,0,0.3246959924697876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,12,12,128,1,float16,float16,0,0.2100048065185547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,12,12,128,1,float16,fp8,0,0.17904319763183593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,12,12,128,1,fp8,fp8,0,0.17983520030975342
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,12,1,128,1,float16,float16,0,0.18466559648513795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,12,4,128,1,fp8,fp8,0,1.3484064102172852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,12,1,128,1,float16,fp8,0,0.1650447964668274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,12,1,128,1,fp8,fp8,0,0.1624559998512268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,12,2,128,1,float16,float16,0,0.18732320070266723
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,12,2,128,1,float16,fp8,0,0.164137601852417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,12,2,128,1,fp8,fp8,0,0.16379679441452027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,12,2,128,1,float16,float16,0,0.7238639831542969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,12,4,128,1,float16,fp8,0,0.1668496012687683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,12,4,128,1,float16,float16,0,0.19130239486694336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,12,12,128,1,float16,fp8,0,0.10965280532836914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,12,12,128,1,float16,float16,0,0.1303439974784851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,12,12,128,1,fp8,fp8,0,0.1079103946685791
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,12,1,128,1,float16,float16,0,0.10078719854354859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,12,1,128,1,float16,fp8,0,0.09054880142211914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,12,2,128,1,float16,float16,0,0.10096319913864135
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,12,2,128,1,float16,fp8,0,0.09035040140151977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,12,2,128,1,fp8,fp8,0,0.09023839831352234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,12,1,128,1,float16,float16,0,0.3629071950912476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,12,4,128,1,float16,float16,0,0.10452799797058106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,12,4,128,1,float16,fp8,0,0.09088960289955139
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,12,4,128,1,fp8,fp8,0,0.09025920033454896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,12,1,128,1,float16,fp8,0,1.459564781188965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,12,1,128,1,fp8,fp8,0,1.4442784309387207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,12,1,128,1,float16,float16,0,1.679052734375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,12,4,128,1,fp8,fp8,0,0.1666751980781555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,12,12,128,1,float16,float16,0,0.4065855979919434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,12,1,128,1,fp8,fp8,0,0.09007520079612732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,12,2,128,1,float16,float16,0,1.7253631591796874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,12,2,128,1,fp8,fp8,0,1.5776512145996093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,12,1,128,1,fp8,fp8,0,0.3172368049621582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,12,4,128,1,float16,float16,0,1.7520879745483398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,12,12,128,1,float16,float16,0,0.9839119911193848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,12,1,128,1,float16,float16,0,0.8223695755004883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,12,4,128,1,float16,fp8,0,1.5005871772766113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,12,4,128,1,fp8,fp8,0,1.487326431274414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,12,1,128,1,float16,fp8,0,0.8113007545471191
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,12,1,128,1,fp8,fp8,0,0.7503680229187012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,12,2,128,1,float16,float16,0,0.8354175567626954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,12,2,128,1,float16,fp8,0,0.739961576461792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,12,2,128,1,fp8,fp8,0,0.7432784080505371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,12,4,128,1,float16,float16,0,0.8798064231872559
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,12,12,128,1,float16,float16,0,0.4961855888366699
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,12,12,128,1,float16,fp8,0,0.42917280197143554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,12,4,128,1,float16,fp8,0,0.7614719867706299
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,12,4,128,1,fp8,fp8,0,0.7717840194702148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,12,12,128,1,fp8,fp8,0,0.4398767948150635
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,12,1,128,1,float16,fp8,0,0.3733504056930542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,12,1,128,1,float16,float16,0,0.42081117630004883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,12,1,128,1,fp8,fp8,0,0.3743808031082153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,12,2,128,1,float16,fp8,0,1.4687104225158691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,12,2,128,1,float16,fp8,0,0.38725121021270753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,12,2,128,1,float16,float16,0,0.4297232151031494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,12,2,128,1,fp8,fp8,0,0.3770384073257446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,12,4,128,1,float16,float16,0,0.435481595993042
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,12,12,128,1,float16,fp8,0,0.22794079780578613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,12,12,128,1,float16,float16,0,0.2682352066040039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,12,4,128,1,fp8,fp8,0,0.38439359664916994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,12,12,128,1,fp8,fp8,0,0.22761280536651612
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,12,1,128,1,float16,float16,0,0.21760001182556152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,12,1,128,1,float16,fp8,0,0.19291679859161376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,12,1,128,1,fp8,fp8,0,0.19320000410079957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,12,2,128,1,float16,fp8,0,0.19427679777145385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,12,2,128,1,fp8,fp8,0,0.19380639791488646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,12,4,128,1,float16,float16,0,0.22545440196990968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,12,4,128,1,float16,fp8,0,0.1993232011795044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,12,4,128,1,fp8,fp8,0,0.19897279739379883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,12,12,128,1,float16,fp8,0,0.12630560398101806
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,12,12,128,1,fp8,fp8,0,0.12541919946670532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,12,1,128,1,float16,float16,0,0.11471999883651733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,12,1,128,1,float16,fp8,0,0.10227359533309936
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,12,1,128,1,fp8,fp8,0,0.10152000188827515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,12,2,128,1,float16,float16,0,0.11629120111465455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,12,2,128,1,float16,fp8,0,0.10264320373535156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,12,2,128,1,fp8,fp8,0,0.10301439762115479
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,12,4,128,1,float16,float16,0,0.1203376054763794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,12,4,128,1,fp8,fp8,0,0.10341759920120239
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,12,12,128,1,float16,float16,0,0.08707039952278137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,12,12,128,1,float16,fp8,0,0.07369279861450195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,12,12,128,1,fp8,fp8,0,0.07337279915809632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,12,1,128,1,float16,float16,0,0.06288800239562989
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,12,1,128,1,float16,fp8,0,0.057492798566818236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,12,1,128,1,fp8,fp8,0,0.057387202978134155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,12,2,128,1,float16,float16,0,0.06320800185203553
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,12,2,128,1,float16,fp8,0,0.057436800003051756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,12,4,128,1,float16,fp8,0,0.38475039005279543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,12,2,128,1,fp8,fp8,0,0.05753440260887146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,12,4,128,1,float16,float16,0,0.06424319744110107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,12,4,128,1,float16,fp8,0,0.058766400814056395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,12,4,128,1,fp8,fp8,0,0.05885120034217835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,12,2,128,1,float16,float16,0,0.2212127923965454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,12,12,128,1,fp8,fp8,0,0.8502847671508789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,12,12,128,1,float16,float16,0,0.13933759927749634
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,12,4,128,1,float16,fp8,0,0.10433759689331054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,12,1,128,1,float16,fp8,0,1.4283472061157227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,12,1,128,1,float16,float16,0,1.6083152770996094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,12,1,128,1,fp8,fp8,0,1.4311552047729492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,12,2,128,1,float16,fp8,0,1.4475343704223633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,12,2,128,1,float16,float16,0,1.645921516418457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,12,2,128,1,fp8,fp8,0,1.4482704162597657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,12,12,128,1,float16,float16,0,0.9958767890930176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,12,4,128,1,float16,fp8,0,1.4858688354492187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,12,4,128,1,fp8,fp8,0,1.4871376037597657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,12,4,128,1,float16,float16,0,1.7448287963867188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,12,12,128,1,float16,fp8,0,0.9566255569458008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,12,12,128,1,fp8,fp8,0,0.841977596282959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,12,1,128,1,float16,fp8,0,0.7290688037872315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,12,1,128,1,float16,float16,0,0.8156448364257812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,12,1,128,1,fp8,fp8,0,0.7206431865692139
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,12,2,128,1,float16,float16,0,0.874407958984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,12,2,128,1,float16,fp8,0,0.7230624198913574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,12,2,128,1,fp8,fp8,0,0.7304175853729248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,12,12,128,1,float16,float16,0,0.5165823936462403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,12,4,128,1,float16,float16,0,0.8532943725585938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,12,4,128,1,float16,fp8,0,0.7434447765350342
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,12,12,128,1,float16,fp8,0,0.4681215763092041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,12,12,128,1,fp8,fp8,0,0.4287168025970459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,12,1,128,1,float16,float16,0,0.4100895881652832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,12,1,128,1,float16,fp8,0,0.36480638980865476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,12,1,128,1,fp8,fp8,0,0.3650448083877563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,12,2,128,1,float16,float16,0,0.4138031959533691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,12,2,128,1,float16,fp8,0,0.38793919086456297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,12,4,128,1,float16,fp8,0,0.38004159927368164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,12,12,128,1,float16,float16,0,0.26972959041595457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,12,4,128,1,fp8,fp8,0,0.3885567903518677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,12,1,128,1,float16,float16,0,0.20878241062164307
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,12,1,128,1,float16,fp8,0,0.18886560201644897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,12,1,128,1,fp8,fp8,0,0.1856544017791748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,12,2,128,1,float16,float16,0,0.2131216049194336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,12,12,128,1,float16,fp8,0,0.9087648391723633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,12,2,128,1,fp8,fp8,0,0.19034080505371093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,12,4,128,1,float16,float16,0,0.22076640129089356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,12,4,128,1,float16,fp8,0,0.19559359550476074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,12,4,128,1,fp8,fp8,0,0.19516639709472655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,12,12,128,1,float16,float16,0,0.14839520454406738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,12,12,128,1,float16,fp8,0,0.12631200551986693
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,12,12,128,1,fp8,fp8,0,0.12598079442977905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,12,1,128,1,float16,float16,0,0.11101599931716918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,12,1,128,1,float16,fp8,0,0.09811840057373047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,12,1,128,1,fp8,fp8,0,0.09874879717826843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,12,2,128,1,float16,float16,0,0.1129647970199585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,12,2,128,1,float16,fp8,0,0.09957600235939026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,12,2,128,1,fp8,fp8,0,0.0997376024723053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,12,4,128,1,float16,float16,0,0.11676479578018188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,12,4,128,1,fp8,fp8,0,0.781550407409668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,12,4,128,1,fp8,fp8,0,0.10314559936523438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,12,12,128,1,float16,float16,0,0.08228160142898559
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,12,12,128,1,float16,fp8,0,0.07135040163993836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,12,12,128,1,fp8,fp8,0,0.07145919799804687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,12,1,128,1,float16,float16,0,0.059894400835037234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,12,1,128,1,float16,fp8,0,0.05441280007362366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,12,1,128,1,fp8,fp8,0,0.055060797929763795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,12,2,128,1,float16,float16,0,0.05983039736747742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,12,4,128,1,float16,float16,0,0.4318431854248047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,12,12,128,1,float16,fp8,0,0.2366960048675537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,12,2,128,1,fp8,fp8,0,0.055144000053405764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,12,4,128,1,float16,fp8,0,0.05491200089454651
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,12,4,128,1,fp8,fp8,0,0.055155199766159055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,12,12,128,1,float16,float16,0,0.04356479942798615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,12,12,128,1,float16,fp8,0,0.03803359866142273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,12,12,128,1,fp8,fp8,0,0.03805440068244934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,12,1,128,1,float16,float16,0,0.03619360029697418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,12,1,128,1,float16,fp8,0,0.03335680067539215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,12,1,128,1,fp8,fp8,0,0.033236798644065854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,12,2,128,1,float16,float16,0,0.03639039993286133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,12,2,128,1,float16,fp8,0,0.03333759903907776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,12,4,128,1,float16,float16,0,0.03673279881477356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,12,4,128,1,float16,fp8,0,0.03355039954185486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,12,4,128,1,fp8,fp8,0,0.03383519947528839
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,12,2,128,1,fp8,fp8,0,0.03354400098323822
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,12,1,128,1,float16,float16,0,0.9842304229736328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,12,1,128,1,float16,fp8,0,0.8935999870300293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,12,4,128,1,float16,fp8,0,0.10327199697494507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,12,2,128,1,fp8,fp8,0,0.3661488056182861
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,12,2,128,1,float16,fp8,0,0.05545439720153809
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,12,1,128,1,fp8,fp8,0,0.885097599029541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,12,4,128,1,float16,float16,0,0.0626479983329773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,12,2,128,1,float16,float16,0,0.9915087699890137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,12,12,128,1,fp8,fp8,0,0.22809920310974122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,12,2,128,1,float16,fp8,0,0.8976335525512695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,12,2,128,1,float16,fp8,0,0.18768320083618165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,12,2,128,1,fp8,fp8,0,0.8924927711486816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,12,4,128,1,float16,float16,0,1.0378416061401368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,12,12,128,1,float16,float16,0,0.6405231952667236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,12,4,128,1,float16,fp8,0,0.9245488166809082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,12,4,128,1,fp8,fp8,0,0.927244758605957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,12,12,128,1,fp8,fp8,0,0.5291711807250976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,12,1,128,1,float16,fp8,0,0.4730031967163086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,12,2,128,1,float16,fp8,0,0.4525152206420898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,12,2,128,1,fp8,fp8,0,0.4519536018371582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,12,4,128,1,float16,float16,0,0.5408544063568115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,12,4,128,1,float16,fp8,0,0.46787681579589846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,12,4,128,1,fp8,fp8,0,0.46749439239501955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,12,12,128,1,float16,float16,0,0.3308720111846924
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,12,12,128,1,float16,fp8,0,0.270795202255249
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,12,12,128,1,fp8,fp8,0,0.27024478912353517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,12,1,128,1,float16,float16,0,0.2567087888717651
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,12,1,128,1,fp8,fp8,0,0.22916159629821778
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,12,2,128,1,float16,fp8,0,0.23342080116271974
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,12,2,128,1,fp8,fp8,0,0.23260159492492677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,12,4,128,1,float16,float16,0,0.27078559398651125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,12,4,128,1,float16,fp8,0,0.2409440040588379
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,12,4,128,1,fp8,fp8,0,0.24183199405670167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,12,12,128,1,float16,float16,0,0.17396960258483887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,12,12,128,1,float16,fp8,0,0.1419935941696167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,12,12,128,1,float16,fp8,0,0.5321072101593017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,12,12,128,1,fp8,fp8,0,0.14183520078659057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,12,1,128,1,float16,float16,0,0.13332799673080445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,12,1,128,1,float16,float16,0,0.49170398712158203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,12,1,128,1,fp8,fp8,0,0.12061120271682739
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,12,1,128,1,fp8,fp8,0,0.44349279403686526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,12,2,128,1,float16,float16,0,0.13570400476455688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,12,2,128,1,fp8,fp8,0,0.12216000556945801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,12,2,128,1,float16,fp8,0,0.1218000054359436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,12,2,128,1,float16,float16,0,0.5006224155426026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,12,4,128,1,float16,float16,0,0.14160000085830687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,12,4,128,1,float16,fp8,0,0.12679040431976318
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,12,12,128,1,float16,float16,0,0.09475679993629456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,12,4,128,1,fp8,fp8,0,0.12605600357055663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,12,12,128,1,float16,fp8,0,0.07668640017509461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,12,1,128,1,float16,float16,0,0.07119839787483215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,12,1,128,1,float16,fp8,0,0.06416320204734802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,12,1,128,1,fp8,fp8,0,0.06394879817962647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,12,2,128,1,float16,float16,0,0.07351359724998474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,12,2,128,1,fp8,fp8,0,0.0649936020374298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,12,4,128,1,float16,fp8,0,0.06654400229454041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,12,4,128,1,fp8,fp8,0,0.06687359809875489
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,12,12,128,1,float16,float16,0,0.05808479785919189
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,12,12,128,1,float16,fp8,0,0.04297440052032471
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,12,12,128,1,fp8,fp8,0,0.04153920114040375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,12,1,128,1,float16,float16,0,0.040449601411819455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,12,1,128,1,float16,fp8,0,0.037915199995040894
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,12,2,128,1,float16,float16,0,0.041105601191520694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,12,1,128,1,float16,fp8,0,0.22922399044036865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,12,2,128,1,float16,fp8,0,0.03808799982070923
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,12,2,128,1,fp8,fp8,0,0.03781279921531677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,12,2,128,1,float16,float16,0,0.2577631950378418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,12,4,128,1,float16,fp8,0,0.03814719915390015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,12,12,128,1,float16,float16,0,0.032041600346565245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,12,12,128,1,float16,fp8,0,0.028806400299072266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,12,12,128,1,fp8,fp8,0,0.028996801376342772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,12,1,128,1,float16,float16,0,0.029492801427841185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,12,1,128,1,float16,fp8,0,0.027932798862457274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,12,1,128,1,fp8,fp8,0,0.027697598934173583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,12,2,128,1,float16,float16,0,0.029886400699615477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,12,2,128,1,float16,fp8,0,0.028004801273345946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,12,2,128,1,fp8,fp8,0,0.02768160104751587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,12,4,128,1,float16,float16,0,0.03033599853515625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,12,4,128,1,float16,fp8,0,0.02807359993457794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,12,1,128,1,float16,fp8,0,0.121288001537323
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,12,4,128,1,fp8,fp8,0,0.027980801463127137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,12,12,128,1,fp8,fp8,0,0.0762112021446228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,12,2,128,1,float16,fp8,0,0.06495839953422547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,12,4,128,1,float16,float16,0,0.07698079943656921
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,12,1,128,1,fp8,fp8,0,0.03823840022087097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,12,1,128,1,float16,float16,0,1.1487343788146973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,12,1,128,1,float16,fp8,0,1.0780320167541504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,12,4,128,1,float16,float16,0,0.041817599534988405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,12,4,128,1,fp8,fp8,0,0.03792960047721863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,12,1,128,1,fp8,fp8,0,1.0740384101867675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,12,2,128,1,float16,float16,0,1.173465633392334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,12,2,128,1,float16,fp8,0,1.0918911933898925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,12,4,128,1,float16,float16,0,1.2302816390991211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,12,12,128,1,float16,fp8,0,0.6551424026489258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,12,12,128,1,float16,float16,0,0.7595248222351074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,12,2,128,1,fp8,fp8,0,1.0905280113220215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,12,4,128,1,fp8,fp8,0,1.1314991950988769
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,12,1,128,1,float16,float16,0,0.5815167903900147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,12,12,128,1,fp8,fp8,0,0.6663680076599121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,12,1,128,1,float16,fp8,0,0.5558127880096435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,12,1,128,1,fp8,fp8,0,0.5457295894622802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,12,2,128,1,float16,float16,0,0.5926447868347168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,12,2,128,1,float16,fp8,0,0.5538671970367431
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,12,4,128,1,float16,fp8,0,0.5806704044342041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,12,2,128,1,fp8,fp8,0,0.5527967929840087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,12,4,128,1,fp8,fp8,0,0.5710447788238525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,12,12,128,1,fp8,fp8,0,0.3350287914276123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,12,12,128,1,float16,fp8,0,0.3358799934387207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,12,1,128,1,float16,float16,0,0.2989151954650879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,12,1,128,1,float16,fp8,0,0.2789376020431519
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,12,1,128,1,fp8,fp8,0,0.279803204536438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,12,2,128,1,float16,float16,0,0.30505759716033937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,12,2,128,1,float16,fp8,0,0.28218080997467043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,12,2,128,1,fp8,fp8,0,0.28124639987945554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,12,4,128,1,float16,float16,0,0.31963040828704836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,12,4,128,1,float16,fp8,0,0.2941296100616455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,12,4,128,1,fp8,fp8,0,0.2932976007461548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,12,12,128,1,float16,float16,0,0.20058720111846923
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,12,12,128,1,fp8,fp8,0,0.17448480129241944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,12,1,128,1,float16,float16,0,0.15595840215682982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,12,1,128,1,float16,fp8,0,0.14512319564819337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,12,2,128,1,float16,float16,0,0.15928319692611695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,12,2,128,1,float16,fp8,0,0.14667680263519287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,12,2,128,1,fp8,fp8,0,0.1471184015274048
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,12,4,128,1,float16,float16,0,0.16690080165863036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,12,4,128,1,float16,float16,0,0.6225039958953857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,12,4,128,1,fp8,fp8,0,0.15248639583587648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,12,12,128,1,float16,float16,0,0.10690560340881347
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,12,12,128,1,float16,fp8,0,0.09330080151557922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,12,12,128,1,float16,float16,0,0.38685920238494875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,12,12,128,1,fp8,fp8,0,0.09387840032577514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,12,1,128,1,float16,float16,0,0.08306879997253418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,12,1,128,1,fp8,fp8,0,0.07727680206298829
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,12,1,128,1,float16,fp8,0,0.07728959918022156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,12,2,128,1,float16,fp8,0,0.0793071985244751
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,12,2,128,1,fp8,fp8,0,0.07838879823684693
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,12,4,128,1,float16,fp8,0,0.08275200128555298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,12,4,128,1,float16,float16,0,0.09025120139122009
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,12,12,128,1,float16,float16,0,0.05938400030136108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,12,12,128,1,float16,fp8,0,0.051932799816131595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,12,12,128,1,fp8,fp8,0,0.05204160213470459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,12,1,128,1,float16,float16,0,0.044556799530982974
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,12,1,128,1,float16,fp8,0,0.0428384006023407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,12,4,128,1,float16,fp8,0,1.1340847969055177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,12,2,128,1,float16,float16,0,0.04510239958763122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,12,2,128,1,fp8,fp8,0,0.04376319944858551
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,12,12,128,1,float16,fp8,0,0.1734287977218628
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,12,2,128,1,float16,fp8,0,0.043663999438285826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,12,4,128,1,float16,float16,0,0.0478879988193512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,12,12,128,1,float16,float16,0,0.031092798709869383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,12,4,128,1,float16,fp8,0,0.04381439983844757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,12,12,128,1,float16,fp8,0,0.028916800022125246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,12,12,128,1,fp8,fp8,0,0.02858879864215851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,12,1,128,1,float16,float16,0,0.02614719867706299
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,12,1,128,1,float16,fp8,0,0.025204798579216002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,12,1,128,1,fp8,fp8,0,0.025574401021003723
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,12,2,128,1,float16,float16,0,0.026235198974609374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,12,2,128,1,float16,fp8,0,0.026822400093078614
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,12,2,128,1,fp8,fp8,0,0.026531198620796205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,12,4,128,1,float16,float16,0,0.02707360088825226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,12,4,128,1,float16,fp8,0,0.15352799892425537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,12,4,128,1,float16,fp8,0,0.02709279954433441
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,12,4,128,1,fp8,fp8,0,0.027348798513412476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,12,12,128,1,float16,fp8,0,0.03449440002441406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,12,12,128,1,fp8,fp8,0,0.034467199444770814
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,12,1,128,1,float16,float16,0,0.028734400868415833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,12,1,128,1,float16,fp8,0,0.029681599140167235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,12,1,128,1,fp8,fp8,0,0.029252800345420837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,12,2,128,1,float16,float16,0,0.03115839958190918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,12,2,128,1,float16,fp8,0,0.03054560124874115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,12,2,128,1,fp8,fp8,0,0.030847999453544616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,12,4,128,1,float16,float16,0,0.03250559866428375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,12,4,128,1,float16,fp8,0,0.032348799705505374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,12,2,128,1,float16,float16,0,0.08480479717254638
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,12,4,128,1,fp8,fp8,0,0.08230080008506775
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,12,1,128,1,float16,float16,0,0.7228352069854737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,12,1,128,1,float16,fp8,0,0.6923535823822021
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,12,1,128,1,fp8,fp8,0,0.042630401253700254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,12,1,128,1,fp8,fp8,0,0.6962384223937989
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,12,2,128,1,float16,float16,0,0.7457600116729737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,12,4,128,1,fp8,fp8,0,0.04387679994106293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,12,1,128,1,fp8,fp8,0,0.146070396900177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,12,2,128,1,float16,fp8,0,0.7042960166931153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,12,2,128,1,fp8,fp8,0,0.7104656219482421
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,12,4,128,1,float16,float16,0,0.8045599937438965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,12,4,128,1,float16,fp8,0,0.755622386932373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,12,12,128,1,float16,fp8,0,0.470363187789917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,12,12,128,1,float16,float16,0,0.034176000952720643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,12,12,128,1,float16,float16,0,0.5453855991363525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,12,4,128,1,fp8,fp8,0,0.7518144130706788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,12,1,128,1,float16,float16,0,0.36639840602874757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,12,1,128,1,fp8,fp8,0,0.35302081108093264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,12,2,128,1,float16,fp8,0,0.36199839115142823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,12,4,128,1,fp8,fp8,0,0.03206880092620849
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,12,4,128,1,float16,float16,0,0.4089216232299805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,12,4,128,1,float16,fp8,0,0.3804768085479736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,12,4,128,1,fp8,fp8,0,0.3832815885543823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,12,12,128,1,float16,float16,0,0.28066239356994627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,12,12,128,1,float16,fp8,0,0.24331998825073242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,12,1,128,1,float16,float16,0,0.19082720279693605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,12,1,128,1,float16,fp8,0,0.18467359542846679
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,12,12,128,1,fp8,fp8,0,0.24256160259246826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,12,1,128,1,fp8,fp8,0,0.1845199942588806
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,12,2,128,1,float16,float16,0,0.1962015986442566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,12,2,128,1,float16,fp8,0,0.18724960088729858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,12,2,128,1,fp8,fp8,0,0.18781599998474122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,12,4,128,1,float16,float16,0,0.21188640594482422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,12,4,128,1,float16,fp8,0,0.1978943943977356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,12,12,128,1,float16,float16,0,0.14753279685974122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,12,12,128,1,float16,fp8,0,0.12763839960098267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,12,12,128,1,fp8,fp8,0,0.12805440425872802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,12,1,128,1,float16,float16,0,0.10168479681015015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,12,1,128,1,float16,fp8,0,0.09848319888114929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,12,1,128,1,fp8,fp8,0,0.09810879826545715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,12,2,128,1,float16,float16,0,0.10512959957122803
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,12,2,128,1,float16,fp8,0,0.10000159740447997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,12,2,128,1,fp8,fp8,0,0.09989920258522034
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,12,4,128,1,float16,float16,0,0.11257439851760864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,12,12,128,1,fp8,fp8,0,0.47373762130737307
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,12,4,128,1,float16,fp8,0,0.10535039901733398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,12,12,128,1,float16,float16,0,0.08086559772491456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,12,12,128,1,float16,fp8,0,0.0706287980079651
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,12,1,128,1,float16,fp8,0,0.3577552080154419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,12,1,128,1,float16,float16,0,0.0575007975101471
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,12,1,128,1,float16,fp8,0,0.054054397344589236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,12,2,128,1,fp8,fp8,0,0.36115360260009766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,12,1,128,1,fp8,fp8,0,0.05347999930381775
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,12,2,128,1,float16,float16,0,0.05933600068092346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,12,2,128,1,float16,fp8,0,0.05579040050506592
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,12,4,128,1,float16,float16,0,0.06354719996452332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,12,4,128,1,float16,fp8,0,0.059620797634124756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,12,4,128,1,fp8,fp8,0,0.05925599932670593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,12,12,128,1,float16,float16,0,0.04663839936256409
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,12,12,128,1,float16,fp8,0,0.03945919871330261
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,12,12,128,1,fp8,fp8,0,0.03984000086784363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,12,1,128,1,float16,float16,0,0.03153760135173798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,12,1,128,1,float16,fp8,0,0.03265439867973328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,12,1,128,1,fp8,fp8,0,0.03224000036716461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,12,2,128,1,float16,float16,0,0.03198719918727875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,12,2,128,1,float16,fp8,0,0.032548800110816956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,12,2,128,1,fp8,fp8,0,0.032681599259376526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,12,4,128,1,float16,fp8,0,0.03331040143966675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,12,4,128,1,fp8,fp8,0,0.033155199885368344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,12,4,128,1,fp8,fp8,0,0.1987120032310486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,12,12,128,1,float16,float16,0,0.026204800605773924
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,12,12,128,1,float16,fp8,0,0.02475520074367523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,12,12,128,1,fp8,fp8,0,0.024766400456428528
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,12,1,128,1,float16,float16,0,0.02179519981145859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,12,1,128,1,fp8,fp8,0,0.022694399952888487
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,12,2,128,1,float16,float16,0,0.02221920043230057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,12,2,128,1,float16,fp8,0,0.022443200647830962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,12,2,128,1,fp8,fp8,0,0.022599999606609345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,12,4,128,1,float16,float16,0,0.022705599665641785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,12,4,128,1,float16,fp8,0,0.02324959933757782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,12,4,128,1,fp8,fp8,0,0.023347200453281404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,12,4,128,1,fp8,fp8,0,0.10546879768371582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,12,12,128,1,fp8,fp8,0,0.07089279890060425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,12,12,128,1,fp8,fp8,0,0.01844639927148819
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,12,1,128,1,float16,float16,0,0.01687999963760376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,12,2,128,1,fp8,fp8,0,0.055315202474594115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,12,1,128,1,fp8,fp8,0,0.017260800302028655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,12,2,128,1,float16,float16,0,0.01714400053024292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,12,2,128,1,float16,fp8,0,0.017323200404644013
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,12,2,128,1,fp8,fp8,0,0.017497600615024568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,12,4,128,1,float16,float16,0,0.017246399819850922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,12,4,128,1,float16,fp8,0,0.0173567995429039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,12,4,128,1,fp8,fp8,0,0.017523199319839478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,12,4,128,1,float16,float16,0,0.03543359935283661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,12,12,128,1,float16,fp8,0,0.018007999658584593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,12,12,128,1,fp8,fp8,0,0.01804639995098114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,12,1,128,1,float16,float16,0,0.01677280068397522
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,12,1,128,1,float16,fp8,0,0.016979199647903443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,12,1,128,1,float16,fp8,0,0.022588799893856048
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,12,1,128,1,fp8,fp8,0,0.016991999745368958
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,12,2,128,1,float16,float16,0,0.016702400147914888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,12,2,128,1,fp8,fp8,0,0.017113600671291352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,12,4,128,1,float16,fp8,0,0.01687840074300766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,12,4,128,1,fp8,fp8,0,0.01687040030956268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,12,12,128,1,float16,float16,0,0.018798400461673737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,12,12,128,1,float16,fp8,0,0.01839679926633835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,12,1,128,1,float16,float16,0,0.29907679557800293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,12,1,128,1,float16,fp8,0,0.30393280982971194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,12,1,128,1,float16,fp8,0,0.0176256000995636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,12,1,128,1,fp8,fp8,0,0.30090720653533937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,12,2,128,1,float16,fp8,0,0.30991039276123045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,12,2,128,1,float16,float16,0,0.31094560623168943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,12,2,128,1,fp8,fp8,0,0.3090575933456421
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,12,4,128,1,float16,float16,0,0.3407984018325806
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,12,12,128,1,float16,float16,0,0.24780640602111817
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,12,12,128,1,float16,float16,0,0.017795200645923614
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,12,4,128,1,fp8,fp8,0,0.3305808067321777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,12,12,128,1,float16,fp8,0,0.2077023983001709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,12,1,128,1,float16,float16,0,0.15667999982833863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,12,1,128,1,fp8,fp8,0,0.15723520517349243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,12,2,128,1,float16,float16,0,0.16236319541931152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,12,2,128,1,float16,fp8,0,0.1606927990913391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,12,2,128,1,fp8,fp8,0,0.16068960428237916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,12,2,128,1,float16,fp8,0,0.017001600563526155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,12,4,128,1,float16,float16,0,0.016897599399089813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,12,4,128,1,float16,fp8,0,0.1713296055793762
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,12,4,128,1,fp8,fp8,0,0.1727344036102295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,12,12,128,1,float16,float16,0,0.1315376043319702
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,12,12,128,1,float16,fp8,0,0.11099840402603149
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,12,12,128,1,fp8,fp8,0,0.11038240194320678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,12,1,128,1,float16,float16,0,0.08553439974784852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,12,1,128,1,float16,fp8,0,0.08535199761390685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,12,2,128,1,float16,float16,0,0.3777087926864624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,12,2,128,1,float16,float16,0,0.08803520202636719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,12,2,128,1,fp8,fp8,0,0.08687679767608643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,12,2,128,1,float16,fp8,0,0.08697760105133057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,12,4,128,1,float16,float16,0,0.0964784026145935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,12,4,128,1,float16,fp8,0,0.09281119704246521
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,12,4,128,1,fp8,fp8,0,0.09268320202827454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,12,12,128,1,float16,float16,0,0.07326719760894776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,12,12,128,1,float16,fp8,0,0.061694401502609256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,12,12,128,1,fp8,fp8,0,0.061406397819519044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,12,1,128,1,float16,float16,0,0.0489439994096756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,12,1,128,1,float16,fp8,0,0.046254399418830874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,12,1,128,1,fp8,fp8,0,0.04680159986019135
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,12,2,128,1,float16,float16,0,0.05029919743537903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,12,4,128,1,float16,fp8,0,0.33163840770721437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,12,2,128,1,fp8,fp8,0,0.04821119904518127
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,12,4,128,1,float16,float16,0,0.054769599437713624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,12,4,128,1,float16,fp8,0,0.0520304024219513
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,12,4,128,1,fp8,fp8,0,0.051867198944091794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,12,1,128,1,float16,fp8,0,0.15814880132675171
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,12,12,128,1,float16,float16,0,0.04163840115070343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,12,12,128,1,float16,fp8,0,0.03483839929103851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,12,12,128,1,fp8,fp8,0,0.03423520028591156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,12,1,128,1,float16,float16,0,0.026851201057434083
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,12,1,128,1,float16,fp8,0,0.026763200759887695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,12,2,128,1,float16,float16,0,0.027134400606155396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,12,2,128,1,fp8,fp8,0,0.02707360088825226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,12,4,128,1,float16,float16,0,0.17797759771347046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,12,4,128,1,float16,float16,0,0.030000001192092896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,12,4,128,1,fp8,fp8,0,0.028571200370788575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,12,4,128,1,float16,fp8,0,0.0284527987241745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,12,12,128,1,float16,float16,0,0.023086400330066682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,12,12,128,1,float16,fp8,0,0.02130720019340515
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,12,12,128,1,fp8,fp8,0,0.021652799844741822
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,12,1,128,1,float16,float16,0,0.018513600528240203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,12,1,128,1,float16,fp8,0,0.019705599546432494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,12,1,128,1,fp8,fp8,0,0.01932799965143204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,12,2,128,1,float16,float16,0,0.018993599712848662
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,12,2,128,1,float16,fp8,0,0.01948480010032654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,12,2,128,1,fp8,fp8,0,0.019702400267124175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,12,4,128,1,float16,float16,0,0.019387200474739075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,12,4,128,1,float16,fp8,0,0.019993600249290467
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,12,4,128,1,fp8,fp8,0,0.01995840072631836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,12,12,128,1,float16,float16,0,0.01574240028858185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,12,12,128,1,float16,fp8,0,0.015563200414180755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,12,12,128,1,fp8,fp8,0,0.015379199385643005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,12,1,128,1,float16,float16,0,0.013913600146770478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,12,1,128,1,float16,fp8,0,0.01438400000333786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,12,2,128,1,float16,float16,0,0.014169600605964661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,12,2,128,1,float16,fp8,0,0.01478080004453659
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,12,2,128,1,fp8,fp8,0,0.014627200365066529
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,12,4,128,1,float16,float16,0,0.014526399970054626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,12,4,128,1,float16,fp8,0,0.014692799746990204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,12,4,128,1,fp8,fp8,0,0.014497600495815277
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,12,12,128,1,float16,float16,0,0.014723199605941772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,12,12,128,1,float16,fp8,0,0.014987200498580933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,12,2,128,1,float16,fp8,0,0.04876160025596619
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,12,12,128,1,fp8,fp8,0,0.015142400562763215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,12,1,128,1,float16,float16,0,0.013582399487495423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,12,1,128,1,float16,fp8,0,0.0141184002161026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,12,1,128,1,fp8,fp8,0,0.013972799479961395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,12,2,128,1,float16,float16,0,0.013736000657081604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,12,2,128,1,float16,fp8,0,0.014206400513648987
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,12,2,128,1,fp8,fp8,0,0.014116799831390381
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,12,4,128,1,float16,float16,0,0.013865600526332855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,12,4,128,1,float16,fp8,0,0.014214399456977844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,12,4,128,1,fp8,fp8,0,0.014361600577831268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,12,12,128,1,float16,float16,0,0.014150400459766389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,12,12,128,1,float16,fp8,0,0.014856000244617463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,12,12,128,1,fp8,fp8,0,0.014694400131702423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,12,1,128,1,float16,float16,0,0.013471999764442444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,12,1,128,1,float16,fp8,0,0.013929599523544311
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,12,2,128,1,float16,float16,0,0.013553600013256072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,12,2,128,1,float16,fp8,0,0.014193600416183472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,12,2,128,1,fp8,fp8,0,0.014115199446678162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,12,4,128,1,float16,float16,0,0.013683199882507324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,12,4,128,1,float16,fp8,0,0.014230400323867798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,12,4,128,1,fp8,fp8,0,0.014097599685192109
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,12,1,128,1,float16,float16,0,0.147435200214386
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,12,1,128,1,float16,fp8,0,0.15393279790878295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,12,1,128,1,fp8,fp8,0,0.1518336057662964
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,12,2,128,1,float16,float16,0,0.1544111967086792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,12,2,128,1,float16,fp8,0,0.1561568021774292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,12,1,128,1,fp8,fp8,0,0.01430400013923645
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,12,2,128,1,fp8,fp8,0,0.15701279640197754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,12,4,128,1,float16,float16,0,0.16928160190582275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,12,4,128,1,float16,fp8,0,0.16768319606781007
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,12,4,128,1,fp8,fp8,0,0.16832799911499025
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,12,12,128,1,float16,float16,0,0.1305951952934265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,12,12,128,1,float16,fp8,0,0.1100383996963501
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,12,12,128,1,fp8,fp8,0,0.11066720485687256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,12,1,128,1,float16,fp8,0,0.0819920003414154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,12,1,128,1,fp8,fp8,0,0.0825984001159668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,12,2,128,1,float16,float16,0,0.0847760021686554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,12,2,128,1,float16,fp8,0,0.08372799754142761
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,12,12,128,1,fp8,fp8,0,0.20806560516357422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,12,2,128,1,fp8,fp8,0,0.08352640271186829
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,12,4,128,1,float16,float16,0,0.09311519861221314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,12,4,128,1,float16,fp8,0,0.08906400203704834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,12,2,128,1,float16,fp8,0,0.026862400770187377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,12,4,128,1,fp8,fp8,0,0.08879520297050476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,12,1,128,1,fp8,fp8,0,0.013876800239086152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,12,12,128,1,float16,fp8,0,0.05866559743881226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,12,12,128,1,fp8,fp8,0,0.05896160006523132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,12,1,128,1,float16,fp8,0,0.04262720048427582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,12,1,128,1,float16,float16,0,0.04590399861335755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,12,2,128,1,float16,float16,0,0.04806720018386841
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,12,2,128,1,fp8,fp8,0,0.044747200608253476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,12,4,128,1,float16,float16,0,0.05223039984703064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,12,4,128,1,float16,fp8,0,0.04751999974250794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,12,1,128,1,fp8,fp8,0,0.08575999736785889
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,12,4,128,1,fp8,fp8,0,0.047539201378822324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,12,12,128,1,float16,fp8,0,0.029399999976158143
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,12,12,128,1,fp8,fp8,0,0.029607999324798583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,12,1,128,1,float16,float16,0,0.023401600122451783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,12,1,128,1,float16,fp8,0,0.024004800617694853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,12,1,128,1,fp8,fp8,0,0.023887999355793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,12,2,128,1,float16,float16,0,0.023686400055885314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,12,2,128,1,float16,fp8,0,0.024060800671577454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,12,1,128,1,float16,float16,0,0.0817903995513916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,12,2,128,1,fp8,fp8,0,0.02401919960975647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,12,4,128,1,float16,float16,0,0.02699199914932251
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,12,4,128,1,float16,fp8,0,0.024303999543190003
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,12,4,128,1,fp8,fp8,0,0.024267199635505676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,12,12,128,1,float16,float16,0,0.021670399606227873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,12,12,128,1,float16,fp8,0,0.019012799859046935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,12,12,128,1,fp8,fp8,0,0.019244800508022308
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,12,1,128,1,float16,float16,0,0.017067199945449828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,12,1,128,1,fp8,fp8,0,0.0176144003868103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,12,2,128,1,float16,float16,0,0.0174127995967865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,12,2,128,1,float16,fp8,0,0.01775680035352707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,12,1,128,1,fp8,fp8,0,0.027267199754714967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,12,12,128,1,float16,float16,0,0.07245919704437256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,12,4,128,1,float16,float16,0,0.018174399435520173
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,12,4,128,1,float16,fp8,0,0.018198400735855103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,12,4,128,1,fp8,fp8,0,0.01801439970731735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,12,12,128,1,float16,float16,0,0.014395199716091156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,12,12,128,1,float16,fp8,0,0.013391999900341034
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,12,12,128,1,fp8,fp8,0,0.013827200233936309
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,12,1,128,1,fp8,fp8,0,0.043188801407814024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,12,1,128,1,float16,fp8,0,0.01292639970779419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,12,1,128,1,fp8,fp8,0,0.013046400249004364
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,12,2,128,1,float16,float16,0,0.012827199697494508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,12,2,128,1,float16,fp8,0,0.012750400602817536
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,12,2,128,1,fp8,fp8,0,0.012902399897575379
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,12,4,128,1,float16,float16,0,0.013184000551700593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,12,4,128,1,float16,fp8,0,0.012780800461769104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,12,4,128,1,fp8,fp8,0,0.013100799918174744
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,12,12,128,1,float16,float16,0,0.013147200644016265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,12,12,128,1,float16,fp8,0,0.012958399951457977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,12,12,128,1,fp8,fp8,0,0.012948800623416901
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,12,1,128,1,float16,float16,0,0.012191999703645706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,12,1,128,1,float16,fp8,0,0.012532800436019897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,12,12,128,1,float16,float16,0,0.039710399508476255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,12,2,128,1,float16,fp8,0,0.01273919939994812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,12,2,128,1,fp8,fp8,0,0.012558400630950928
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,12,4,128,1,float16,float16,0,0.012256000190973282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,12,4,128,1,float16,fp8,0,0.012681600451469422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,12,4,128,1,fp8,fp8,0,0.012558400630950928
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,12,12,128,1,float16,float16,0,0.012740799784660339
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,12,12,128,1,float16,fp8,0,0.013064000010490417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,12,12,128,1,fp8,fp8,0,0.012838399410247803
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,12,1,128,1,float16,float16,0,0.012115199863910676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,12,1,128,1,float16,fp8,0,0.012347199767827988
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,12,1,128,1,fp8,fp8,0,0.012337599694728852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,12,2,128,1,float16,float16,0,0.011958400160074234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,12,2,128,1,float16,fp8,0,0.012321600317955017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,12,1,128,1,float16,fp8,0,0.017951999604701997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,12,2,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,12,4,128,1,float16,float16,0,0.011952000111341477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,12,4,128,1,fp8,fp8,0,0.01231200024485588
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,12,12,128,1,float16,float16,0,0.012267199903726577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,12,2,128,1,fp8,fp8,0,0.017851200699806214
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,12,12,128,1,float16,fp8,0,0.012436799705028534
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,12,1,128,1,float16,float16,0,0.011675199866294861
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,12,1,128,1,float16,fp8,0,0.0119439996778965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,12,1,128,1,fp8,fp8,0,0.012139199674129486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,12,1,128,1,float16,float16,0,0.012299200147390365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,12,2,128,1,float16,float16,0,0.011798399686813354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,12,2,128,1,fp8,fp8,0,0.012129600346088409
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,12,2,128,1,float16,fp8,0,0.011923199892044068
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,12,2,128,1,float16,fp8,0,0.04394400119781494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,12,4,128,1,float16,float16,0,0.01175839975476265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,12,4,128,1,float16,fp8,0,0.012055999785661697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,12,4,128,1,fp8,fp8,0,0.01191840022802353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,12,1,128,1,float16,float16,0,0.08223519921302795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,12,1,128,1,fp8,fp8,0,0.08324800133705139
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,12,2,128,1,float16,float16,0,0.08459200263023377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,12,2,128,1,float16,fp8,0,0.0841488003730774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,12,2,128,1,fp8,fp8,0,0.08380320072174072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,12,4,128,1,float16,float16,0,0.10811519622802734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,12,2,128,1,float16,float16,0,0.012163200229406358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,12,4,128,1,float16,fp8,0,0.10470080375671387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,12,12,128,1,float16,float16,0,0.08273119926452636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,12,12,128,1,float16,fp8,0,0.06476640105247497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,12,12,128,1,fp8,fp8,0,0.06419039964675903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,12,1,128,1,float16,float16,0,0.04689440131187439
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,12,1,128,1,float16,fp8,0,0.04301440119743347
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,12,1,128,1,fp8,fp8,0,0.043635201454162595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,12,2,128,1,float16,float16,0,0.04821600019931793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,12,2,128,1,float16,fp8,0,0.045505601167678836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,12,2,128,1,fp8,fp8,0,0.04367679953575134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,12,4,128,1,float16,float16,0,0.05967680215835571
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,12,4,128,1,float16,fp8,0,0.05664799809455871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,12,4,128,1,fp8,fp8,0,0.0565775990486145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,12,12,128,1,float16,float16,0,0.0443807989358902
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,12,12,128,1,float16,fp8,0,0.03263840079307556
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,12,12,128,1,fp8,fp8,0,0.0335312008857727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,12,1,128,1,float16,float16,0,0.02346719950437546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,12,1,128,1,float16,fp8,0,0.024326400458812715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,12,1,128,1,fp8,fp8,0,0.02448800057172775
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,12,2,128,1,float16,float16,0,0.023473599553108217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,12,2,128,1,float16,fp8,0,0.024447999894618988
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,12,2,128,1,fp8,fp8,0,0.024371199309825897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,12,4,128,1,float16,float16,0,0.03125280141830444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,12,4,128,1,float16,fp8,0,0.028590399026870727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,12,4,128,1,fp8,fp8,0,0.028697600960731505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,12,12,128,1,float16,float16,0,0.02104640007019043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,12,12,128,1,float16,fp8,0,0.01831520050764084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,12,12,128,1,fp8,fp8,0,0.018118399381637573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,12,12,128,1,fp8,fp8,0,0.012604799866676331
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,12,1,128,1,float16,fp8,0,0.018011200428009033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,12,1,128,1,fp8,fp8,0,0.01820639967918396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,12,2,128,1,float16,fp8,0,0.01817920058965683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,12,2,128,1,fp8,fp8,0,0.01820160001516342
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,12,4,128,1,float16,float16,0,0.01826079934835434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,12,4,128,1,float16,fp8,0,0.01846559941768646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,12,4,128,1,fp8,fp8,0,0.018513600528240203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,12,1,128,1,float16,fp8,0,0.0837552011013031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,12,12,128,1,float16,float16,0,0.015167999267578124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,12,12,128,1,fp8,fp8,0,0.013998399674892425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,12,1,128,1,float16,float16,0,0.012724800407886505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,12,1,128,1,fp8,fp8,0,0.012432000041007996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,12,1,128,1,float16,fp8,0,0.012676799297332763
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,12,1,128,1,fp8,fp8,0,0.01265919953584671
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,12,2,128,1,float16,float16,0,0.012720000743865967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,12,4,128,1,fp8,fp8,0,0.10541280508041381
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,12,2,128,1,float16,fp8,0,0.012656000256538392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,12,2,128,1,fp8,fp8,0,0.012937599420547485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,12,4,128,1,float16,fp8,0,0.012856000661849975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,12,4,128,1,fp8,fp8,0,0.0130048006772995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,12,12,128,1,float16,float16,0,0.011631999909877778
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,12,12,128,1,float16,fp8,0,0.01091040000319481
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,12,12,128,1,fp8,fp8,0,0.01098880022764206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,12,1,128,1,float16,float16,0,0.012417600303888322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,12,1,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,12,1,128,1,fp8,fp8,0,0.01268640011548996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,12,2,128,1,float16,float16,0,0.01252799928188324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,12,2,128,1,float16,fp8,0,0.012644800543785095
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,12,2,128,1,fp8,fp8,0,0.01268800050020218
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,12,4,128,1,float16,float16,0,0.012377600371837615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,12,4,128,1,float16,fp8,0,0.012598399817943574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,12,4,128,1,fp8,fp8,0,0.012520000338554382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,12,12,128,1,float16,float16,0,0.010662399977445603
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,12,12,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,12,12,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,12,1,128,1,float16,float16,0,0.011844799667596818
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,12,1,128,1,float16,fp8,0,0.012268800288438797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,12,1,128,1,fp8,fp8,0,0.012294399738311767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,12,2,128,1,float16,float16,0,0.011928000301122666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,12,2,128,1,float16,fp8,0,0.012383999675512314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,12,4,128,1,float16,float16,0,0.012123200297355651
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,12,4,128,1,float16,fp8,0,0.012273599952459335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,12,4,128,1,fp8,fp8,0,0.012252800166606903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,12,12,128,1,float16,float16,0,0.010761599987745285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,12,12,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,12,1,128,1,float16,float16,0,0.01759999990463257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,12,1,128,1,float16,float16,0,0.011643200367689132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,12,2,128,1,float16,float16,0,0.017643199861049653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,12,1,128,1,float16,fp8,0,0.012104000151157378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,12,2,128,1,float16,float16,0,0.01185920014977455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,12,1,128,1,fp8,fp8,0,0.012065599858760833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,12,2,128,1,float16,fp8,0,0.01223199963569641
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,12,2,128,1,fp8,fp8,0,0.011921600252389909
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,12,4,128,1,float16,float16,0,0.01189119964838028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,12,4,128,1,float16,fp8,0,0.011955200135707856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,12,4,128,1,fp8,fp8,0,0.012148799747228623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,12,12,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,12,12,128,1,float16,fp8,0,0.01024319976568222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,12,12,128,1,fp8,fp8,0,0.010103999823331832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,12,1,128,1,float16,float16,0,0.011540800333023071
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,12,1,128,1,float16,fp8,0,0.011958400160074234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,12,1,128,1,fp8,fp8,0,0.011790399998426437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,12,2,128,1,float16,float16,0,0.01167680025100708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,12,2,128,1,float16,fp8,0,0.01180799975991249
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,12,2,128,1,fp8,fp8,0,0.01202080026268959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,12,4,128,1,float16,float16,0,0.011515200138092041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,12,4,128,1,float16,fp8,0,0.011804799735546111
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,12,4,128,1,fp8,fp8,0,0.011801599711179733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,12,1,128,1,float16,float16,0,0.04711360037326813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,12,1,128,1,float16,fp8,0,0.04454559981822968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,12,1,128,1,fp8,fp8,0,0.044924798607826236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,12,2,128,1,float16,float16,0,0.056862401962280276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,12,2,128,1,float16,fp8,0,0.05460000038146973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,12,2,128,1,fp8,fp8,0,0.05470079779624939
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,12,4,128,1,float16,float16,0,0.06137279868125915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,12,4,128,1,float16,fp8,0,0.05827040076255798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,12,4,128,1,fp8,fp8,0,0.05854079723358154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,12,12,128,1,float16,float16,0,0.060305601358413695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,12,12,128,1,float16,fp8,0,0.05120800137519836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,12,12,128,1,fp8,fp8,0,0.05132960081100464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,12,1,128,1,float16,float16,0,0.02415039986371994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,12,2,128,1,fp8,fp8,0,0.01220960021018982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,12,1,128,1,float16,fp8,0,0.025488001108169556
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,12,1,128,1,fp8,fp8,0,0.025494399666786193
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,12,2,128,1,float16,float16,0,0.028086400032043456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,12,4,128,1,float16,fp8,0,0.012398400157690049
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,12,2,128,1,fp8,fp8,0,0.02963840067386627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,12,4,128,1,float16,float16,0,0.030398398637771606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,12,4,128,1,float16,fp8,0,0.02958720028400421
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,12,12,128,1,float16,float16,0,0.029311999678611755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,12,12,128,1,float16,fp8,0,0.027184000611305235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,12,12,128,1,fp8,fp8,0,0.027134400606155396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,12,1,128,1,float16,float16,0,0.017785599827766417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,12,1,128,1,float16,fp8,0,0.01871519982814789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,12,12,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,12,1,128,1,fp8,fp8,0,0.018753600120544434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,12,2,128,1,float16,float16,0,0.017990399897098542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,12,2,128,1,float16,fp8,0,0.018745599687099455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,12,2,128,1,fp8,fp8,0,0.018569600582122803
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,12,4,128,1,float16,float16,0,0.01854880005121231
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,12,4,128,1,float16,fp8,0,0.018731200695037843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,12,4,128,1,fp8,fp8,0,0.018723200261592864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,12,12,128,1,float16,float16,0,0.018456000089645385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,12,12,128,1,float16,fp8,0,0.017211200296878816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,12,12,128,1,fp8,fp8,0,0.0172447994351387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,12,1,128,1,float16,fp8,0,0.013199999928474426
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,12,1,128,1,fp8,fp8,0,0.013187199831008911
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,12,2,128,1,float16,float16,0,0.012825599312782288
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,12,2,128,1,float16,fp8,0,0.01334560066461563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,12,2,128,1,fp8,fp8,0,0.013129599392414093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,12,4,128,1,float16,float16,0,0.013196800649166108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,12,4,128,1,float16,fp8,0,0.013228799402713775
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,12,4,128,1,fp8,fp8,0,0.013393600285053254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,12,12,128,1,float16,fp8,0,0.014017599821090698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,12,12,128,1,float16,fp8,0,0.013407999277114868
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,12,1,128,1,float16,float16,0,0.012099199742078782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,12,1,128,1,float16,fp8,0,0.012478400021791458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,12,1,128,1,fp8,fp8,0,0.012630400061607362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,12,2,128,1,float16,float16,0,0.01226079985499382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,12,4,128,1,float16,float16,0,0.012870399653911591
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,12,2,128,1,float16,fp8,0,0.012544000148773193
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,12,2,128,1,fp8,fp8,0,0.01257600039243698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,12,4,128,1,float16,float16,0,0.012068799883127212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,12,4,128,1,float16,fp8,0,0.012387199699878693
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,12,4,128,1,fp8,fp8,0,0.01239520013332367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,12,12,128,1,float16,float16,0,0.011095999926328658
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,12,12,128,1,float16,fp8,0,0.010726399719715118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,12,1,128,1,float16,float16,0,0.011988800019025803
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,12,1,128,1,float16,fp8,0,0.01242400035262108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,12,1,128,1,fp8,fp8,0,0.012432000041007996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,12,2,128,1,float16,float16,0,0.011964800208806992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,12,2,128,1,float16,fp8,0,0.012507200241088867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,12,2,128,1,fp8,fp8,0,0.012444800138473511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,12,4,128,1,float16,float16,0,0.012031999975442886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,12,4,128,1,float16,fp8,0,0.01231200024485588
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,12,2,128,1,float16,fp8,0,0.029716798663139345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,12,4,128,1,fp8,fp8,0,0.012281599640846252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,12,12,128,1,float16,float16,0,0.01085280030965805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,12,12,128,1,float16,fp8,0,0.010304000228643417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,12,4,128,1,fp8,fp8,0,0.02961440086364746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,12,12,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,12,1,128,1,float16,float16,0,0.011638399958610535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,12,1,128,1,fp8,fp8,0,0.01204800009727478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,12,2,128,1,float16,float16,0,0.011699199676513672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,12,2,128,1,float16,fp8,0,0.012300799787044524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,12,2,128,1,fp8,fp8,0,0.012028799951076507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,12,4,128,1,float16,float16,0,0.011761599779129028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,12,4,128,1,float16,fp8,0,0.01199359968304634
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,12,4,128,1,fp8,fp8,0,0.012059199810028075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,12,12,128,1,float16,float16,0,0.010611200332641601
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,12,12,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,12,12,128,1,fp8,fp8,0,0.01053439974784851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,12,1,128,1,float16,float16,0,0.011657600104808808
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,12,1,128,1,float16,fp8,0,0.011721599847078323
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,12,1,128,1,fp8,fp8,0,0.01191840022802353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,12,2,128,1,float16,float16,0,0.01151840016245842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,12,2,128,1,float16,fp8,0,0.01188800036907196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,12,2,128,1,fp8,fp8,0,0.011856000125408172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,12,4,128,1,float16,float16,0,0.01141280010342598
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,12,4,128,1,float16,fp8,0,0.01178240031003952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,12,4,128,1,fp8,fp8,0,0.011699199676513672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,12,12,128,1,float16,float16,0,0.01024480015039444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,12,12,128,1,float16,fp8,0,0.010113599896430969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,12,12,128,1,fp8,fp8,0,0.010236799716949463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,12,12,128,1,float16,float16,0,0.013793599605560303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,12,1,128,1,float16,float16,0,0.011503999680280685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,12,1,128,1,float16,fp8,0,0.011919999867677689
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,12,2,128,1,float16,float16,0,0.01154239997267723
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,12,2,128,1,float16,fp8,0,0.011767999827861786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,12,2,128,1,fp8,fp8,0,0.011950399726629257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,12,4,128,1,float16,float16,0,0.011163199692964554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,12,4,128,1,float16,fp8,0,0.011840000003576278
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,12,4,128,1,fp8,fp8,0,0.011459200084209442
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,12,1,128,1,float16,float16,0,0.029782399535179138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,12,1,128,1,float16,fp8,0,0.03176479935646057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,12,1,128,1,fp8,fp8,0,0.03166559934616089
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,12,2,128,1,float16,float16,0,0.029702401161193846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,12,2,128,1,float16,fp8,0,0.03152959942817688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,12,12,128,1,fp8,fp8,0,0.010931199789047242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,12,2,128,1,fp8,fp8,0,0.03156160116195679
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,12,4,128,1,float16,float16,0,0.03863359987735748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,12,4,128,1,fp8,fp8,0,0.03619360029697418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,12,12,128,1,float16,float16,0,0.047417598962783816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,12,12,128,1,float16,fp8,0,0.04475519955158234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,12,12,128,1,fp8,fp8,0,0.04491840004920959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,12,1,128,1,float16,float16,0,0.01897920072078705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,12,1,128,1,float16,fp8,0,0.01969279944896698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,12,1,128,1,fp8,fp8,0,0.020177599787712098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,12,2,128,1,float16,float16,0,0.018824000656604768
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,12,2,128,1,float16,fp8,0,0.019860799610614776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,12,2,128,1,fp8,fp8,0,0.019801600277423857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,12,4,128,1,float16,float16,0,0.02253119945526123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,12,4,128,1,float16,fp8,0,0.021590399742126464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,12,12,128,1,float16,float16,0,0.027187201380729675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,12,4,128,1,fp8,fp8,0,0.021939200162887574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,12,12,128,1,float16,fp8,0,0.026051199436187743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,12,12,128,1,fp8,fp8,0,0.02640160024166107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,12,1,128,1,float16,fp8,0,0.014105600118637086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,12,1,128,1,fp8,fp8,0,0.014256000518798828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,12,1,128,1,float16,float16,0,0.01310880035161972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,12,2,128,1,float16,float16,0,0.013799999654293061
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,12,2,128,1,float16,fp8,0,0.013972799479961395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,12,4,128,1,float16,float16,0,0.015302400290966033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,12,4,128,1,float16,fp8,0,0.014428800344467163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,12,4,128,1,fp8,fp8,0,0.01465280055999756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,12,12,128,1,fp8,fp8,0,0.013433599472045898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,12,12,128,1,float16,float16,0,0.017185600101947786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,12,12,128,1,float16,fp8,0,0.016704000532627106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,12,1,128,1,float16,float16,0,0.01244800016283989
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,12,1,128,1,float16,fp8,0,0.012718400359153748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,12,1,128,1,fp8,fp8,0,0.012918399274349212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,12,2,128,1,float16,float16,0,0.012540799379348756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,12,2,128,1,float16,fp8,0,0.012913599610328674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,12,2,128,1,fp8,fp8,0,0.012985600531101227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,12,4,128,1,float16,fp8,0,0.03631680011749268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,12,4,128,1,float16,float16,0,0.011224000155925751
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,12,4,128,1,float16,fp8,0,0.01106560006737709
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,12,12,128,1,float16,float16,0,0.013607999682426453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,12,12,128,1,float16,fp8,0,0.013264000415802002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,12,12,128,1,fp8,fp8,0,0.013102400302886962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,12,1,128,1,float16,float16,0,0.01194240003824234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,12,1,128,1,float16,fp8,0,0.012371200323104858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,12,1,128,1,fp8,fp8,0,0.012348800152540206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,12,2,128,1,float16,float16,0,0.011828800290822982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,12,1,128,1,float16,fp8,0,0.012164799869060517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,12,2,128,1,float16,fp8,0,0.012280000001192093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,12,2,128,1,fp8,fp8,0,0.012303999811410903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,12,1,128,1,float16,float16,0,0.013251200318336487
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,12,4,128,1,float16,float16,0,0.010513599961996078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,12,4,128,1,float16,fp8,0,0.010238400101661682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,12,4,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,12,12,128,1,float16,fp8,0,0.010900799930095673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,12,12,128,1,fp8,fp8,0,0.010678400099277497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,12,1,128,1,float16,float16,0,0.011766400188207626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,12,1,128,1,float16,fp8,0,0.012305600196123123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,12,2,128,1,float16,float16,0,0.011849600076675414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,12,2,128,1,float16,fp8,0,0.012142399698495865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,12,2,128,1,fp8,fp8,0,0.014190399646759033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,12,4,128,1,float16,float16,0,0.010183999687433243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,12,4,128,1,float16,fp8,0,0.009884800016880035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,12,1,128,1,fp8,fp8,0,0.011902400106191636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,12,4,128,1,fp8,fp8,0,0.010073599964380264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,12,12,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,12,12,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,12,12,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,12,12,128,1,fp8,fp8,0,0.016631999611854555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,12,1,128,1,float16,float16,0,0.011670400202274323
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,12,1,128,1,float16,fp8,0,0.011964800208806992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,12,1,128,1,fp8,fp8,0,0.011737599968910217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,12,2,128,1,float16,float16,0,0.011531200259923935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,12,2,128,1,fp8,fp8,0,0.01170239970088005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,12,4,128,1,float16,float16,0,0.009860800206661224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,12,4,128,1,fp8,fp8,0,0.009697599709033966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,12,12,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,12,12,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,12,12,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,12,1,128,1,float16,fp8,0,0.011865600198507308
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,12,1,128,1,fp8,fp8,0,0.011876799911260606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,12,4,128,1,fp8,fp8,0,0.010830400139093399
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,12,2,128,1,float16,fp8,0,0.011609599739313126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,12,2,128,1,fp8,fp8,0,0.011601600050926208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,12,4,128,1,float16,float16,0,0.009668800234794616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,12,4,128,1,float16,fp8,0,0.009446399658918381
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,12,4,128,1,fp8,fp8,0,0.009585600346326828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,12,12,128,1,float16,float16,0,0.01029599979519844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,12,12,128,1,float16,fp8,0,0.009987200051546097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,12,12,128,1,fp8,fp8,0,0.00997759997844696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,12,1,128,1,float16,float16,0,0.011470399796962738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,12,12,128,1,float16,float16,0,0.010952000319957734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,12,1,128,1,float16,fp8,0,0.011720000207424164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,12,1,128,1,fp8,fp8,0,0.011713600158691407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,12,1,128,1,fp8,fp8,0,0.012164799869060517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,12,2,128,1,float16,fp8,0,0.011553599685430526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,12,2,128,1,fp8,fp8,0,0.011615999788045884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,12,4,128,1,float16,float16,0,0.00981760025024414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,12,2,128,1,fp8,fp8,0,0.012171199917793274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,12,4,128,1,fp8,fp8,0,0.009347199648618697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,12,1,128,1,float16,float16,0,0.017900800704956053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,12,1,128,1,float16,fp8,0,0.017744000256061553
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,12,1,128,1,fp8,fp8,0,0.017817600071430205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,12,2,128,1,float16,float16,0,0.02351360023021698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,12,2,128,1,float16,fp8,0,0.02337439954280853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,12,2,128,1,float16,fp8,0,0.011854399740695954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,12,2,128,1,fp8,fp8,0,0.023372800648212434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,12,4,128,1,float16,float16,0,0.03478400111198425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,12,4,128,1,float16,fp8,0,0.009875199943780898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,12,4,128,1,fp8,fp8,0,0.03466080129146576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,12,12,128,1,float16,float16,0,0.043772798776626584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,12,12,128,1,float16,fp8,0,0.04378080070018768
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,12,1,128,1,float16,float16,0,0.011372800171375274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,12,12,128,1,fp8,fp8,0,0.04323680102825165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,12,1,128,1,float16,fp8,0,0.012532800436019897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,12,1,128,1,fp8,fp8,0,0.012505599856376648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,12,2,128,1,float16,float16,0,0.01528320014476776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,12,2,128,1,float16,float16,0,0.011336000263690948
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,12,2,128,1,fp8,fp8,0,0.015142400562763215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,12,4,128,1,float16,float16,0,0.02104640007019043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,12,4,128,1,fp8,fp8,0,0.02096800059080124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,12,12,128,1,float16,float16,0,0.025732800364494324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,12,12,128,1,float16,fp8,0,0.025380799174308778
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,12,12,128,1,fp8,fp8,0,0.025383999943733214
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,12,1,128,1,float16,float16,0,0.011400000005960465
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,12,1,128,1,float16,fp8,0,0.011313600093126297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,12,1,128,1,fp8,fp8,0,0.011448000371456147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,12,2,128,1,float16,float16,0,0.011577600240707397
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,12,2,128,1,float16,fp8,0,0.011540800333023071
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,12,2,128,1,fp8,fp8,0,0.011566399782896041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,12,2,128,1,float16,float16,0,0.011105599999427795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,12,4,128,1,float16,float16,0,0.0141744002699852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,12,4,128,1,fp8,fp8,0,0.013935999572277069
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,12,12,128,1,float16,float16,0,0.016564799845218657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,12,4,128,1,float16,fp8,0,0.00933919996023178
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,12,12,128,1,float16,fp8,0,0.016451199352741242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,12,12,128,1,fp8,fp8,0,0.01635040044784546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,12,1,128,1,float16,fp8,0,0.010283199697732925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,12,1,128,1,fp8,fp8,0,0.010308799892663955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,12,2,128,1,float16,float16,0,0.010654400289058685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,12,2,128,1,float16,fp8,0,0.010694400221109391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,12,2,128,1,fp8,fp8,0,0.010662399977445603
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,12,4,128,1,float16,float16,0,0.010689599812030793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,12,4,128,1,float16,fp8,0,0.010744000226259232
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,12,4,128,1,float16,fp8,0,0.03463039994239807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,12,4,128,1,fp8,fp8,0,0.01069760024547577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,12,12,128,1,float16,float16,0,0.013079999387264252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,12,12,128,1,fp8,fp8,0,0.01289760023355484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,12,1,128,1,float16,float16,0,0.012932799756526947
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,12,1,128,1,float16,fp8,0,0.00971359983086586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,12,2,128,1,float16,fp8,0,0.015369600057601929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,12,2,128,1,float16,float16,0,0.010300800204277039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,12,4,128,1,float16,fp8,0,0.020916800200939178
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,12,2,128,1,float16,fp8,0,0.010091199725866317
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,12,2,128,1,fp8,fp8,0,0.00997920036315918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,12,4,128,1,float16,fp8,0,0.010022400319576264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,12,4,128,1,fp8,fp8,0,0.01005600020289421
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,12,12,128,1,float16,float16,0,0.010665600001811982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,12,12,128,1,float16,fp8,0,0.01064639985561371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,12,12,128,1,fp8,fp8,0,0.010737600177526474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,12,1,128,1,float16,fp8,0,0.010147199779748917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,12,1,128,1,fp8,fp8,0,0.010129600018262862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,12,2,128,1,float16,float16,0,0.010012800246477127
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,12,2,128,1,float16,fp8,0,0.009878399968147277
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,12,2,128,1,fp8,fp8,0,0.009732799977064133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,12,4,128,1,float16,float16,0,0.009969600290060044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,12,4,128,1,float16,fp8,0,0.013873599469661713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,12,4,128,1,float16,fp8,0,0.009948799759149552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,12,4,128,1,fp8,fp8,0,0.009950400143861771
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,12,12,128,1,float16,fp8,0,0.01008479967713356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,12,12,128,1,fp8,fp8,0,0.010116799920797347
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,12,1,128,1,float16,float16,0,0.010548800230026245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,12,1,128,1,float16,float16,0,0.009625600278377533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,12,1,128,1,fp8,fp8,0,0.009524799883365631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,12,2,128,1,float16,float16,0,0.009691199660301209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,12,2,128,1,float16,fp8,0,0.009483200311660767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,12,2,128,1,fp8,fp8,0,0.009471999853849411
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,12,4,128,1,float16,float16,0,0.00968799963593483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,12,4,128,1,float16,fp8,0,0.00960479974746704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,12,4,128,1,fp8,fp8,0,0.009591999650001525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,12,12,128,1,float16,float16,0,0.010260800272226334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,12,12,128,1,float16,fp8,0,0.010267200320959092
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,12,12,128,1,float16,fp8,0,0.01284639984369278
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,12,12,128,1,fp8,fp8,0,0.010131199657917023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,12,1,128,1,float16,float16,0,0.009774400293827057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,12,1,128,1,float16,float16,0,0.009769599884748459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,12,1,128,1,fp8,fp8,0,0.009567999839782714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,12,2,128,1,float16,float16,0,0.009620799869298934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,12,2,128,1,float16,fp8,0,0.009459199756383896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,12,2,128,1,fp8,fp8,0,0.009374400228261947
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,12,4,128,1,float16,float16,0,0.009737599641084671
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,12,4,128,1,float16,fp8,0,0.009588800370693207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,12,12,128,1,float16,float16,0,0.010076799988746643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,12,4,128,1,float16,float16,0,0.01029599979519844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,12,12,128,1,fp8,fp8,0,0.009913600236177444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,12,1,128,1,float16,float16,0,0.00960479974746704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,12,1,128,1,float16,float16,0,0.009788800030946732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,12,1,128,1,float16,fp8,0,0.009548799693584442
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,12,1,128,1,fp8,fp8,0,0.00952799990773201
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,12,2,128,1,float16,float16,0,0.009518399834632874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,12,2,128,1,float16,fp8,0,0.009326399862766266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,12,2,128,1,fp8,fp8,0,0.009350399672985076
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,12,4,128,1,float16,float16,0,0.00936639979481697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,12,4,128,1,float16,fp8,0,0.009321600198745728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,12,4,128,1,fp8,fp8,0,0.009284800291061402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,12,1,128,1,float16,fp8,0,0.009307199716567993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,12,1,128,1,float16,fp8,0,0.009483200311660767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,12,1,128,1,fp8,fp8,0,0.009694399684667588
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,12,4,128,1,fp8,fp8,0,0.009494400024414063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,12,12,128,1,float16,fp8,0,0.009822399914264679
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,12,12,128,1,float16,float16,0,0.010329599678516387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,8,1,128,1,float16,fp8,0,5.948980712890625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,8,1,128,1,fp8,fp8,0,5.913449478149414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,8,2,128,1,float16,fp8,0,5.867529678344726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,8,2,128,1,fp8,fp8,0,5.884803390502929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,8,2,128,1,float16,float16,0,7.614984130859375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,8,8,128,1,float16,fp8,0,3.0104032516479493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,8,1,128,1,float16,float16,0,7.519158172607422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,8,4,128,1,float16,float16,0,7.511360168457031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,8,8,128,1,fp8,fp8,0,3.0064319610595702
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,8,4,128,1,float16,fp8,0,5.9574542999267575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,8,4,128,1,fp8,fp8,0,6.094895935058593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,8,1,128,1,float16,float16,0,3.726988983154297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,8,1,128,1,float16,fp8,0,2.9257503509521485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,8,1,128,1,fp8,fp8,0,2.9085968017578123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,8,2,128,1,float16,fp8,0,2.9339487075805666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,8,2,128,1,fp8,fp8,0,2.957439994812012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,8,8,128,1,float16,float16,0,3.5992305755615233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,8,2,128,1,float16,float16,0,3.735627365112305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,8,4,128,1,float16,float16,0,3.558950424194336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,8,8,128,1,float16,fp8,0,1.5350655555725097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,8,8,128,1,fp8,fp8,0,1.5101327896118164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,8,4,128,1,float16,fp8,0,3.0946304321289064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,8,8,128,1,float16,float16,0,1.776852798461914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,8,1,128,1,float16,fp8,0,1.556163215637207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,8,1,128,1,float16,float16,0,1.7271999359130858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,8,4,128,1,fp8,fp8,0,2.9494943618774414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,8,2,128,1,float16,fp8,0,1.478452777862549
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,8,1,128,1,fp8,fp8,0,1.7098896026611328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,8,2,128,1,float16,float16,0,1.6821151733398438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,8,2,128,1,fp8,fp8,0,1.4834063529968262
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,8,4,128,1,float16,float16,0,1.9084447860717773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,8,4,128,1,float16,fp8,0,1.4873135566711426
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,8,4,128,1,fp8,fp8,0,1.6708608627319337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,8,1,128,1,float16,fp8,0,0.7488160133361816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,8,8,128,1,float16,float16,0,0.8711343765258789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,8,1,128,1,float16,float16,0,0.8930144309997559
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,8,8,128,1,fp8,fp8,0,0.8088080406188964
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,8,2,128,1,float16,float16,0,0.8507136344909668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,8,1,128,1,fp8,fp8,0,0.8805279731750488
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,8,2,128,1,float16,fp8,0,0.7748528003692627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,8,2,128,1,fp8,fp8,0,0.7401599884033203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,8,4,128,1,fp8,fp8,0,0.7570911884307862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,8,4,128,1,float16,fp8,0,0.7977104187011719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,8,4,128,1,float16,float16,0,0.8576064109802246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,8,1,128,1,float16,fp8,0,3.443979263305664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,8,1,128,1,float16,float16,0,4.216664123535156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,8,1,128,1,fp8,fp8,0,3.3375904083251955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,8,2,128,1,fp8,fp8,0,3.3302848815917967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,8,2,128,1,float16,fp8,0,3.368571090698242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,8,2,128,1,float16,float16,0,4.014284896850586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,8,4,128,1,float16,float16,0,4.196337509155273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,8,4,128,1,float16,fp8,0,3.4352336883544923
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,8,8,128,1,float16,fp8,0,1.7377616882324218
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,8,8,128,1,float16,fp8,0,0.7700431823730469
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,8,8,128,1,float16,float16,0,2.1089263916015626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,8,8,128,1,fp8,fp8,0,1.7718528747558593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,8,4,128,1,fp8,fp8,0,3.511414337158203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,8,1,128,1,float16,float16,0,2.08251838684082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,8,1,128,1,float16,fp8,0,1.8133167266845702
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,8,1,128,1,fp8,fp8,0,1.7039823532104492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,8,2,128,1,float16,fp8,0,1.6582880020141602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,8,2,128,1,float16,float16,0,1.9170000076293945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,8,2,128,1,fp8,fp8,0,1.8507631301879883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,8,4,128,1,float16,fp8,0,1.7040224075317383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,8,8,128,1,float16,fp8,0,0.9045040130615234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,8,4,128,1,fp8,fp8,0,1.842363166809082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,8,8,128,1,fp8,fp8,0,0.8743375778198242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,8,1,128,1,float16,float16,0,1.016708755493164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,8,1,128,1,float16,fp8,0,0.8664752006530761
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,8,1,128,1,fp8,fp8,0,0.8431488037109375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,8,2,128,1,float16,float16,0,0.982323169708252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,8,2,128,1,fp8,fp8,0,0.8340047836303711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,8,2,128,1,float16,fp8,0,0.8691776275634766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,8,4,128,1,float16,fp8,0,0.8654848098754883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,8,8,128,1,float16,fp8,0,0.45231199264526367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,8,4,128,1,fp8,fp8,0,0.8483967781066895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,8,8,128,1,fp8,fp8,0,0.4609327793121338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,8,4,128,1,float16,float16,0,1.0817567825317382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,8,1,128,1,float16,float16,0,0.49007840156555177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,8,1,128,1,fp8,fp8,0,0.42179198265075685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,8,2,128,1,float16,float16,0,0.4915616035461426
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,8,2,128,1,fp8,fp8,0,0.4444240093231201
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,8,4,128,1,float16,fp8,0,0.4267744064331055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,8,4,128,1,float16,float16,0,0.5139488220214844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,8,4,128,1,fp8,fp8,0,0.44237117767333983
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,8,8,128,1,float16,float16,0,1.0610272407531738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,8,4,128,1,float16,float16,0,2.0621856689453124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,8,1,128,1,float16,fp8,0,2.324407958984375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,8,8,128,1,float16,float16,0,0.5630847930908203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,8,1,128,1,float16,float16,0,2.915484809875488
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,8,1,128,1,float16,fp8,0,0.4218416213989258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,8,2,128,1,float16,fp8,0,0.4554719924926758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,8,2,128,1,float16,float16,0,2.891035270690918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,8,2,128,1,float16,fp8,0,2.4353567123413087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,8,2,128,1,fp8,fp8,0,2.342929649353027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,8,4,128,1,float16,fp8,0,2.3807199478149412
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,8,8,128,1,float16,float16,0,1.6286720275878905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,8,4,128,1,fp8,fp8,0,2.375494384765625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,8,4,128,1,float16,float16,0,3.0922239303588865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,8,8,128,1,fp8,fp8,0,1.2588208198547364
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,8,8,128,1,float16,fp8,0,1.337003231048584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,8,1,128,1,fp8,fp8,0,2.3226415634155275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,8,1,128,1,float16,float16,0,1.3562000274658204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,8,1,128,1,float16,fp8,0,1.3213215827941895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,8,1,128,1,fp8,fp8,0,1.1806367874145507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,8,2,128,1,float16,fp8,0,1.2001744270324708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,8,2,128,1,float16,float16,0,1.4970751762390138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,8,2,128,1,fp8,fp8,0,1.196561622619629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,8,8,128,1,float16,float16,0,0.7594431877136231
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,8,4,128,1,float16,float16,0,1.3831263542175294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,8,4,128,1,float16,fp8,0,1.2507247924804688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,8,4,128,1,fp8,fp8,0,1.2556896209716797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,8,1,128,1,float16,float16,0,0.6804848194122315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,8,1,128,1,float16,fp8,0,0.6248767852783204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,8,1,128,1,fp8,fp8,0,0.5894735813140869
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,8,2,128,1,fp8,fp8,0,0.5956143856048584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,8,4,128,1,float16,float16,0,0.722049617767334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,8,4,128,1,float16,fp8,0,0.6212736129760742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,8,8,128,1,float16,float16,0,0.4107664108276367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,8,8,128,1,float16,fp8,0,0.3530992031097412
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,8,8,128,1,fp8,fp8,0,0.3363471984863281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,8,1,128,1,float16,float16,0,0.3508336067199707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,8,1,128,1,float16,fp8,0,0.3214031934738159
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,8,2,128,1,float16,float16,0,0.3612351894378662
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,8,2,128,1,float16,fp8,0,0.32106239795684816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,8,2,128,1,fp8,fp8,0,0.3110208034515381
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,8,4,128,1,float16,float16,0,0.3733760118484497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,8,4,128,1,float16,fp8,0,0.3138495922088623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,8,4,128,1,fp8,fp8,0,0.31426880359649656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,8,8,128,1,float16,fp8,0,0.6415840148925781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,8,8,128,1,fp8,fp8,0,0.7167088031768799
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,8,2,128,1,float16,float16,0,0.6967264175415039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,8,4,128,1,fp8,fp8,0,0.6114560127258301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,8,1,128,1,fp8,fp8,0,0.30891520977020265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,8,1,128,1,float16,fp8,0,3.044223976135254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,8,1,128,1,fp8,fp8,0,3.0157695770263673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,8,1,128,1,float16,float16,0,3.8377151489257812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,8,2,128,1,float16,float16,0,3.708652877807617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,8,2,128,1,float16,fp8,0,3.0490575790405274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,8,2,128,1,float16,fp8,0,0.5896192073822022
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,8,2,128,1,fp8,fp8,0,3.0286720275878904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,8,8,128,1,float16,float16,0,2.0078367233276366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,8,4,128,1,float16,float16,0,3.753163146972656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,8,8,128,1,fp8,fp8,0,1.608505630493164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,8,4,128,1,float16,fp8,0,3.111819267272949
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,8,1,128,1,float16,fp8,0,1.535267162322998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,8,1,128,1,float16,float16,0,1.8106191635131836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,8,1,128,1,fp8,fp8,0,1.6858175277709961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,8,2,128,1,float16,fp8,0,1.5288864135742188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,8,2,128,1,fp8,fp8,0,1.5538448333740233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,8,2,128,1,float16,float16,0,1.9017744064331055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,8,4,128,1,float16,float16,0,1.8498111724853517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,8,4,128,1,float16,fp8,0,1.6937952041625977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,8,8,128,1,float16,float16,0,0.928048038482666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,8,8,128,1,float16,fp8,0,0.812507152557373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,8,8,128,1,fp8,fp8,0,0.9131839752197266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,8,4,128,1,fp8,fp8,0,1.5895008087158202
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,8,1,128,1,float16,fp8,0,0.8045248031616211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,8,1,128,1,float16,float16,0,0.9821807861328125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,8,1,128,1,fp8,fp8,0,0.8153856277465821
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,8,8,128,1,float16,fp8,0,1.7500495910644531
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,8,2,128,1,float16,float16,0,0.8861408233642578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,8,2,128,1,float16,fp8,0,0.8049951553344726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,8,2,128,1,fp8,fp8,0,0.7876255989074707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,8,8,128,1,float16,fp8,0,0.4389984130859375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,8,4,128,1,fp8,fp8,0,3.1843072891235353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,8,8,128,1,fp8,fp8,0,0.41502881050109863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,8,4,128,1,float16,float16,0,0.9159248352050782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,8,4,128,1,float16,fp8,0,0.8512016296386719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,8,4,128,1,fp8,fp8,0,0.8091039657592773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,8,1,128,1,float16,float16,0,0.45085439682006834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,8,1,128,1,float16,fp8,0,0.41546077728271485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,8,1,128,1,fp8,fp8,0,0.3848335981369019
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,8,2,128,1,float16,float16,0,0.44581918716430663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,8,2,128,1,fp8,fp8,0,0.40398879051208497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,8,4,128,1,float16,float16,0,0.4558576107025146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,8,8,128,1,float16,float16,0,0.24399199485778808
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,8,8,128,1,float16,fp8,0,0.2096560001373291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,8,8,128,1,fp8,fp8,0,0.2144927978515625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,8,4,128,1,fp8,fp8,0,0.4116047859191895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,8,1,128,1,float16,float16,0,0.22448959350585937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,8,1,128,1,float16,fp8,0,0.19926079511642455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,8,1,128,1,fp8,fp8,0,0.20082240104675292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,8,2,128,1,float16,fp8,0,0.20176479816436768
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,8,2,128,1,fp8,fp8,0,0.2010256052017212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,8,4,128,1,float16,fp8,0,0.20045759677886962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,8,4,128,1,float16,float16,0,0.2351775884628296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,8,1,128,1,float16,fp8,0,1.749496078491211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,8,8,128,1,float16,float16,0,0.4749760150909424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,8,1,128,1,float16,float16,0,2.0888864517211916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,8,2,128,1,float16,fp8,0,0.4323728084564209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,8,4,128,1,float16,fp8,0,0.40320801734924316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,8,2,128,1,float16,float16,0,0.2302095890045166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,8,4,128,1,fp8,fp8,0,0.20342400074005126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,8,1,128,1,fp8,fp8,0,1.7676336288452148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,8,2,128,1,float16,fp8,0,1.8804895401000976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,8,2,128,1,float16,float16,0,2.1005104064941404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,8,2,128,1,fp8,fp8,0,1.7460975646972656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,8,8,128,1,float16,float16,0,1.1152655601501464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,8,8,128,1,float16,fp8,0,0.9567968368530273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,8,4,128,1,float16,float16,0,2.1792831420898438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,8,4,128,1,float16,fp8,0,1.7960496902465821
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,8,8,128,1,fp8,fp8,0,1.0669808387756348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,8,1,128,1,float16,float16,0,1.0264320373535156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,8,4,128,1,fp8,fp8,0,1.8138351440429688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,8,1,128,1,float16,fp8,0,0.8889167785644532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,8,1,128,1,fp8,fp8,0,0.8970208168029785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,8,2,128,1,float16,float16,0,1.0193903923034668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,8,2,128,1,float16,fp8,0,1.0306896209716796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,8,2,128,1,fp8,fp8,0,0.8840368270874024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,8,8,128,1,float16,fp8,0,0.4809455871582031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,8,8,128,1,float16,float16,0,0.5722095966339111
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,8,4,128,1,float16,float16,0,1.1014911651611328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,8,8,128,1,fp8,fp8,0,0.5240799903869628
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,8,4,128,1,fp8,fp8,0,0.9147295951843262
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,8,4,128,1,float16,fp8,0,0.9391247749328613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,8,1,128,1,float16,float16,0,0.5494544029235839
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,8,1,128,1,fp8,fp8,0,0.4481232166290283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,8,2,128,1,float16,float16,0,0.5158095836639405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,8,2,128,1,float16,fp8,0,0.47135038375854493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,8,4,128,1,float16,float16,0,0.5392352104187011
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,8,8,128,1,float16,float16,0,0.3042896032333374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,8,4,128,1,fp8,fp8,0,0.46755199432373046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,8,8,128,1,float16,fp8,0,0.25237441062927246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,8,8,128,1,fp8,fp8,0,0.2602672100067139
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,8,1,128,1,float16,float16,0,0.2634335994720459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,8,1,128,1,float16,fp8,0,0.22901439666748047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,8,1,128,1,fp8,fp8,0,0.23364479541778566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,8,2,128,1,float16,fp8,0,0.23663039207458497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,8,2,128,1,float16,float16,0,0.26586079597473145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,8,2,128,1,fp8,fp8,0,0.23086240291595458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,8,4,128,1,float16,float16,0,0.2766319990158081
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,8,8,128,1,float16,float16,0,0.16829919815063477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,8,4,128,1,fp8,fp8,0,0.23975679874420167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,8,8,128,1,fp8,fp8,0,0.13156640529632568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,8,1,128,1,float16,float16,0,0.14312479496002198
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,8,1,128,1,float16,fp8,0,0.12887040376663209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,8,1,128,1,fp8,fp8,0,0.12694079875946046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,8,2,128,1,float16,fp8,0,0.1285856008529663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,8,2,128,1,fp8,fp8,0,0.12673920392990112
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,8,4,128,1,float16,float16,0,0.15048480033874512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,8,4,128,1,fp8,fp8,0,0.12698719501495362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,8,1,128,1,float16,fp8,0,0.4484992027282715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,8,2,128,1,fp8,fp8,0,0.47738399505615237
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,8,4,128,1,float16,fp8,0,0.47705440521240233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,8,1,128,1,float16,float16,0,1.8882335662841796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,8,8,128,1,float16,fp8,0,0.1323248028755188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,8,2,128,1,float16,float16,0,0.14448959827423097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,8,1,128,1,fp8,fp8,0,1.6499200820922852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,8,4,128,1,float16,fp8,0,0.1270303964614868
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,8,2,128,1,float16,fp8,0,1.7368080139160156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,8,2,128,1,fp8,fp8,0,1.647547149658203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,8,8,128,1,float16,float16,0,1.0827216148376464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,8,4,128,1,float16,float16,0,1.9657136917114257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,8,4,128,1,float16,fp8,0,1.7062368392944336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,8,8,128,1,float16,fp8,0,0.9106271743774415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,8,4,128,1,fp8,fp8,0,1.7059104919433594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,8,8,128,1,fp8,fp8,0,0.9561648368835449
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,8,1,128,1,float16,fp8,0,0.8303999900817871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,8,4,128,1,float16,fp8,0,0.23926079273223877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,8,1,128,1,float16,float16,0,1.0426032066345214
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,8,1,128,1,fp8,fp8,0,0.8359375953674316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,8,2,128,1,float16,float16,0,0.9556879997253418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,8,1,128,1,float16,fp8,0,1.6544240951538085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,8,2,128,1,float16,fp8,0,0.8439616203308106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,8,2,128,1,fp8,fp8,0,0.8315327644348145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,8,8,128,1,float16,float16,0,0.5606832027435302
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,8,8,128,1,float16,fp8,0,0.4523776054382324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,8,4,128,1,fp8,fp8,0,0.8650799751281738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,8,2,128,1,float16,float16,0,1.9319728851318358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,8,4,128,1,float16,float16,0,1.0006256103515625
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,8,8,128,1,fp8,fp8,0,0.45850720405578616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,8,1,128,1,float16,float16,0,0.47617759704589846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,8,1,128,1,float16,fp8,0,0.45648798942565916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,8,1,128,1,fp8,fp8,0,0.4243199825286865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,8,2,128,1,float16,float16,0,0.4809088230133057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,8,2,128,1,float16,fp8,0,0.42258081436157224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,8,4,128,1,float16,fp8,0,0.4361152172088623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,8,8,128,1,float16,fp8,0,0.23270719051361083
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,8,8,128,1,fp8,fp8,0,0.23400640487670898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,8,4,128,1,fp8,fp8,0,0.45894880294799806
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,8,1,128,1,float16,fp8,0,0.21508800983428955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,8,1,128,1,fp8,fp8,0,0.21664960384368898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,8,2,128,1,float16,float16,0,0.24939680099487305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,8,2,128,1,float16,fp8,0,0.2149888038635254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,8,2,128,1,fp8,fp8,0,0.21664960384368898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,8,4,128,1,float16,float16,0,0.2583823919296265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,8,4,128,1,float16,fp8,0,0.22282559871673585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,8,4,128,1,fp8,fp8,0,0.22465438842773439
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,8,8,128,1,float16,float16,0,0.14353599548339843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,8,8,128,1,float16,fp8,0,0.122489595413208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,8,8,128,1,fp8,fp8,0,0.12259839773178101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,8,1,128,1,float16,float16,0,0.12500319480895997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,8,1,128,1,float16,fp8,0,0.11270079612731934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,8,1,128,1,fp8,fp8,0,0.11245440244674683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,8,2,128,1,float16,fp8,0,0.11190240383148194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,8,2,128,1,float16,float16,0,0.1290447950363159
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,8,2,128,1,fp8,fp8,0,0.1117136001586914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,8,4,128,1,float16,float16,0,0.13449280261993407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,8,4,128,1,float16,fp8,0,0.11371519565582275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,8,4,128,1,fp8,fp8,0,0.11402560472488403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,8,8,128,1,float16,float16,0,0.07482879757881164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,8,4,128,1,float16,fp8,0,0.933899211883545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,8,8,128,1,float16,fp8,0,0.06310880184173584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,8,8,128,1,fp8,fp8,0,0.06284800171852112
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,8,1,128,1,float16,fp8,0,0.06042720079421997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,8,1,128,1,fp8,fp8,0,0.060027199983596805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,8,2,128,1,float16,float16,0,0.06637439727783204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,8,2,128,1,fp8,fp8,0,0.06089760065078735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,8,2,128,1,fp8,fp8,0,0.4543759822845459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,8,4,128,1,float16,float16,0,0.06791520118713379
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,8,4,128,1,float16,fp8,0,0.06047359704971313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,8,8,128,1,float16,float16,0,0.2752432107925415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,8,4,128,1,float16,float16,0,0.503115177154541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,8,4,128,1,fp8,fp8,0,0.06060640215873718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,8,1,128,1,float16,float16,0,0.24365921020507814
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,8,1,128,1,float16,fp8,0,0.9843903541564941
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,8,1,128,1,float16,float16,0,1.112622356414795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,8,2,128,1,float16,float16,0,1.132367992401123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,8,2,128,1,float16,fp8,0,0.9776415824890137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,8,2,128,1,fp8,fp8,0,0.9851247787475585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,8,4,128,1,float16,float16,0,1.1973456382751464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,8,4,128,1,float16,fp8,0,1.0314800262451171
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,8,4,128,1,fp8,fp8,0,1.0182928085327148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,8,1,128,1,float16,float16,0,0.06536319851875305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,8,8,128,1,float16,float16,0,0.6608992099761963
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,8,2,128,1,float16,fp8,0,0.059825599193573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,8,8,128,1,float16,fp8,0,0.581332778930664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,8,8,128,1,fp8,fp8,0,0.5511199951171875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,8,1,128,1,float16,float16,0,0.5571712017059326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,8,1,128,1,fp8,fp8,0,0.4973296165466309
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,8,2,128,1,float16,float16,0,0.5697311878204345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,8,2,128,1,float16,fp8,0,0.506766414642334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,8,2,128,1,fp8,fp8,0,0.4982736110687256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,8,4,128,1,float16,float16,0,0.6115471839904785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,8,4,128,1,float16,fp8,0,0.5168896198272706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,8,8,128,1,float16,float16,0,0.3395440101623535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,8,8,128,1,float16,fp8,0,0.2790271997451782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,8,8,128,1,fp8,fp8,0,0.29218881130218505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,8,1,128,1,float16,float16,0,0.28751039505004883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,8,1,128,1,fp8,fp8,0,0.9844639778137207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,8,1,128,1,fp8,fp8,0,0.2545072078704834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,8,2,128,1,float16,float16,0,0.30054559707641604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,8,2,128,1,float16,fp8,0,0.2534703969955444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,8,2,128,1,fp8,fp8,0,0.2563024044036865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,8,4,128,1,float16,float16,0,0.30403039455413816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,8,4,128,1,float16,fp8,0,0.27507839202880857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,8,4,128,1,fp8,fp8,0,0.2624128103256226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,8,8,128,1,float16,float16,0,0.1794000029563904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,8,8,128,1,fp8,fp8,0,0.1475200057029724
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,8,1,128,1,float16,float16,0,0.14965120553970337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,8,1,128,1,float16,fp8,0,0.133297598361969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,8,1,128,1,fp8,fp8,0,0.1342319965362549
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,8,2,128,1,float16,float16,0,0.15263839960098266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,8,2,128,1,float16,fp8,0,0.1324512004852295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,8,2,128,1,fp8,fp8,0,0.13435360193252563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,8,4,128,1,float16,fp8,0,0.1371999979019165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,8,4,128,1,float16,float16,0,0.15774719715118407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,8,8,128,1,float16,float16,0,0.09726399779319764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,8,8,128,1,float16,fp8,0,0.07784479856491089
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,8,4,128,1,fp8,fp8,0,0.13755040168762206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,8,1,128,1,float16,float16,0,0.08058559894561768
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,8,1,128,1,float16,fp8,0,0.07253760099411011
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,8,1,128,1,fp8,fp8,0,0.07252320051193237
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,8,2,128,1,float16,float16,0,0.08187680244445801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,8,2,128,1,float16,fp8,0,0.07264639735221863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,8,2,128,1,fp8,fp8,0,0.07307519912719726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,8,4,128,1,float16,float16,0,0.08709759712219238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,8,4,128,1,float16,fp8,0,0.07204959988594055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,8,1,128,1,float16,fp8,0,0.49863038063049314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,8,8,128,1,float16,float16,0,0.054150402545928955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,8,8,128,1,fp8,fp8,0,0.047200000286102294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,8,1,128,1,float16,float16,0,0.04888800084590912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,8,1,128,1,float16,fp8,0,0.046137601137161255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,8,1,128,1,fp8,fp8,0,0.04580959975719452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,8,2,128,1,float16,float16,0,0.04930880069732666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,8,2,128,1,float16,fp8,0,0.04586400091648102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,8,2,128,1,fp8,fp8,0,0.04604479968547821
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,8,4,128,1,float16,float16,0,0.04992319941520691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,8,4,128,1,float16,fp8,0,0.04612640142440796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,8,4,128,1,fp8,fp8,0,0.045660799741744994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,8,4,128,1,fp8,fp8,0,0.5198448181152344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,8,1,128,1,float16,fp8,0,0.25631840229034425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,8,1,128,1,float16,fp8,0,0.9638992309570312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,8,1,128,1,float16,float16,0,1.0751328468322754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,8,8,128,1,float16,fp8,0,0.14604320526123046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,8,2,128,1,float16,float16,0,1.1057408332824707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,8,8,128,1,fp8,fp8,0,0.07823839783668518
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,8,2,128,1,float16,fp8,0,0.9648880004882813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,8,2,128,1,fp8,fp8,0,0.974612808227539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,8,8,128,1,float16,fp8,0,0.04649440050125122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,8,4,128,1,float16,float16,0,1.1915535926818848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,8,4,128,1,float16,fp8,0,1.0163087844848633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,8,8,128,1,float16,float16,0,0.6621407985687255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,8,4,128,1,fp8,fp8,0,1.0141648292541503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,8,8,128,1,float16,fp8,0,0.5545455932617187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,8,8,128,1,fp8,fp8,0,0.5793056011199951
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,8,1,128,1,float16,float16,0,0.542628812789917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,8,1,128,1,float16,fp8,0,0.4959424018859863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,8,1,128,1,fp8,fp8,0,0.48915681838989256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,8,2,128,1,float16,float16,0,0.5587488174438476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,8,2,128,1,float16,fp8,0,0.5091887950897217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,8,2,128,1,fp8,fp8,0,0.489851188659668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,8,1,128,1,fp8,fp8,0,0.9695136070251464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,8,4,128,1,float16,float16,0,0.5947904109954834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,8,8,128,1,float16,float16,0,0.3393471956253052
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,8,4,128,1,float16,fp8,0,0.5142831802368164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,8,4,128,1,fp8,fp8,0,0.5135087966918945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,8,8,128,1,float16,fp8,0,0.2838351964950562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,8,8,128,1,fp8,fp8,0,0.2847264051437378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,8,1,128,1,fp8,fp8,0,0.25167999267578123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,8,1,128,1,float16,float16,0,0.28817760944366455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,8,2,128,1,float16,float16,0,0.28516640663146975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,8,2,128,1,fp8,fp8,0,0.25258400440216067
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,8,4,128,1,float16,float16,0,0.30010080337524414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,8,4,128,1,fp8,fp8,0,0.26273279190063475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,8,8,128,1,float16,float16,0,0.17433120012283326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,8,8,128,1,float16,fp8,0,0.14830559492111206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,8,8,128,1,fp8,fp8,0,0.1481503963470459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,8,1,128,1,float16,float16,0,0.14477920532226562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,8,1,128,1,float16,fp8,0,0.132151997089386
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,8,1,128,1,fp8,fp8,0,0.1318384051322937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,8,2,128,1,float16,float16,0,0.14833439588546754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,8,2,128,1,fp8,fp8,0,0.13158559799194336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,8,4,128,1,float16,float16,0,0.15703999996185303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,8,4,128,1,float16,fp8,0,0.13723039627075195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,8,4,128,1,fp8,fp8,0,0.13705919981002807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,8,8,128,1,float16,float16,0,0.0921504020690918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,8,8,128,1,float16,fp8,0,0.0791263997554779
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,8,8,128,1,fp8,fp8,0,0.07955840229988098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,8,1,128,1,float16,float16,0,0.07556639909744263
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,8,1,128,1,float16,fp8,0,0.06939359903335571
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,8,1,128,1,fp8,fp8,0,0.06980320215225219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,8,2,128,1,float16,float16,0,0.07905279994010925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,8,2,128,1,float16,fp8,0,0.07024319767951966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,8,2,128,1,fp8,fp8,0,0.0693615972995758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,8,4,128,1,float16,float16,0,0.08350880146026611
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,8,4,128,1,float16,fp8,0,0.07085760235786438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,8,4,128,1,fp8,fp8,0,0.0715936005115509
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,8,8,128,1,float16,float16,0,0.05036479830741882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,8,8,128,1,float16,fp8,0,0.04186240136623383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,8,8,128,1,fp8,fp8,0,0.041440001130104064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,8,1,128,1,float16,float16,0,0.04186240136623383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,8,1,128,1,float16,fp8,0,0.03845280110836029
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,8,1,128,1,fp8,fp8,0,0.03893119990825653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,8,2,128,1,float16,float16,0,0.041371199488639834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,8,2,128,1,float16,fp8,0,0.03881120085716248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,8,2,128,1,fp8,fp8,0,0.03852320015430451
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,8,4,128,1,float16,float16,0,0.04306719899177551
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,8,1,128,1,float16,fp8,0,0.25461440086364745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,8,2,128,1,float16,fp8,0,0.25140159130096434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,8,4,128,1,fp8,fp8,0,0.03895359933376312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,8,4,128,1,float16,fp8,0,0.03840480148792267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,8,8,128,1,float16,float16,0,0.03781439960002899
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,8,1,128,1,float16,float16,0,0.03524959981441498
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,8,4,128,1,float16,fp8,0,0.2734352111816406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,8,1,128,1,float16,fp8,0,0.033606401085853575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,8,2,128,1,float16,float16,0,0.03553119897842407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,8,2,128,1,fp8,fp8,0,0.033873599767684934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,8,2,128,1,float16,fp8,0,0.03391039967536926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,8,4,128,1,float16,fp8,0,0.03357599973678589
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,8,4,128,1,fp8,fp8,0,0.03391200006008148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,8,4,128,1,fp8,fp8,0,0.07259359955787659
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,8,2,128,1,float16,fp8,0,0.13235520124435424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,8,1,128,1,float16,fp8,0,0.6851071834564209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,8,1,128,1,float16,float16,0,0.742081594467163
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,8,1,128,1,fp8,fp8,0,0.678715181350708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,8,2,128,1,float16,float16,0,0.7498640060424805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,8,2,128,1,float16,fp8,0,0.6859280109405518
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,8,2,128,1,fp8,fp8,0,0.6826560020446777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,8,4,128,1,float16,float16,0,0.8087072372436523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,8,4,128,1,float16,fp8,0,0.7152880191802978
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,8,8,128,1,float16,float16,0,0.4584239959716797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,8,8,128,1,fp8,fp8,0,0.03396799862384796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,8,4,128,1,fp8,fp8,0,0.7219200134277344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,8,8,128,1,float16,fp8,0,0.3939039945602417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,8,8,128,1,fp8,fp8,0,0.393720006942749
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,8,4,128,1,float16,float16,0,0.03614400029182434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,8,1,128,1,float16,float16,0,0.3796207904815674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,8,2,128,1,float16,float16,0,0.3850640058517456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,8,1,128,1,fp8,fp8,0,0.34897599220275877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,8,2,128,1,float16,fp8,0,0.3464832067489624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,8,2,128,1,fp8,fp8,0,0.34583680629730223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,8,4,128,1,float16,float16,0,0.4108719825744629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,8,8,128,1,float16,float16,0,0.23636798858642577
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,8,4,128,1,float16,fp8,0,0.3657536029815674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,8,4,128,1,fp8,fp8,0,0.36727519035339357
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,8,8,128,1,fp8,fp8,0,0.20405919551849366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,8,1,128,1,float16,float16,0,0.19667199850082398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,8,1,128,1,float16,fp8,0,0.18050400018692017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,8,1,128,1,fp8,fp8,0,0.17957600355148315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,8,2,128,1,float16,fp8,0,0.1810655951499939
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,8,2,128,1,fp8,fp8,0,0.1794271945953369
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,8,4,128,1,float16,float16,0,0.21256799697875978
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,8,4,128,1,float16,fp8,0,0.1881600022315979
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,8,4,128,1,fp8,fp8,0,0.19025440216064454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,8,8,128,1,float16,fp8,0,0.10889760255813599
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,8,8,128,1,float16,fp8,0,0.033888000249862674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,8,1,128,1,fp8,fp8,0,0.03351680040359497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,8,8,128,1,fp8,fp8,0,0.10845119953155517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,8,1,128,1,float16,float16,0,0.10308640003204346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,8,1,128,1,float16,fp8,0,0.09518880248069764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,8,1,128,1,fp8,fp8,0,0.09568639993667602
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,8,2,128,1,float16,float16,0,0.10567519664764405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,8,2,128,1,float16,fp8,0,0.09585760235786438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,8,2,128,1,fp8,fp8,0,0.09615359902381897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,8,4,128,1,float16,float16,0,0.11224000453948975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,8,4,128,1,float16,fp8,0,0.10009440183639526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,8,4,128,1,fp8,fp8,0,0.10039039850234985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,8,8,128,1,float16,float16,0,0.0686016023159027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,8,8,128,1,float16,fp8,0,0.05843999981880188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,8,8,128,1,fp8,fp8,0,0.05829280018806458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,8,1,128,1,float16,fp8,0,0.05156480073928833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,8,1,128,1,fp8,fp8,0,0.05154399871826172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,8,8,128,1,float16,fp8,0,0.2025520086288452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,8,2,128,1,float16,fp8,0,0.051795202493667605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,8,2,128,1,fp8,fp8,0,0.05176640152931213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,8,4,128,1,float16,float16,0,0.06271039843559265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,8,4,128,1,float16,fp8,0,0.05215200185775757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,8,4,128,1,fp8,fp8,0,0.05232160091400147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,8,8,128,1,float16,float16,0,0.03691839873790741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,8,8,128,1,float16,fp8,0,0.031744000315666196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,8,8,128,1,fp8,fp8,0,0.03155519962310791
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,8,1,128,1,float16,float16,0,0.03234399855136871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,8,2,128,1,float16,float16,0,0.20083999633789062
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,8,1,128,1,float16,fp8,0,0.02990719974040985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,8,1,128,1,fp8,fp8,0,0.030134400725364684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,8,2,128,1,float16,float16,0,0.032513600587844846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,8,2,128,1,fp8,fp8,0,0.030350399017333985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,8,2,128,1,float16,fp8,0,0.030535998940467834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,8,4,128,1,float16,float16,0,0.033371201157569884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,8,4,128,1,float16,fp8,0,0.030924800038337707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,8,4,128,1,fp8,fp8,0,0.03075999915599823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,8,8,128,1,float16,float16,0,0.03737280070781708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,8,8,128,1,float16,fp8,0,0.03603520095348358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,8,8,128,1,fp8,fp8,0,0.03629119992256165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,8,1,128,1,float16,float16,0,0.03154560029506683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,8,1,128,1,float16,fp8,0,0.031065601110458373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,8,1,128,1,fp8,fp8,0,0.03179199993610382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,8,2,128,1,float16,float16,0,0.03443360030651092
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,8,2,128,1,float16,fp8,0,0.033855998516082765
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,8,2,128,1,fp8,fp8,0,0.034030398726463316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,8,4,128,1,float16,float16,0,0.036236798763275145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,8,4,128,1,float16,fp8,0,0.035604798793792726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,8,4,128,1,fp8,fp8,0,0.03590080142021179
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,8,1,128,1,float16,fp8,0,0.3474463939666748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,8,1,128,1,float16,float16,0,0.7826672077178956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,8,1,128,1,float16,fp8,0,0.7417103767395019
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,8,1,128,1,fp8,fp8,0,0.7397024154663085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,8,1,128,1,float16,float16,0,0.05568640232086182
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,8,2,128,1,float16,float16,0,0.056987202167510985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,8,2,128,1,float16,float16,0,0.8039967536926269
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,8,2,128,1,fp8,fp8,0,0.7359024047851562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,8,2,128,1,float16,fp8,0,0.7368239879608154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,8,4,128,1,float16,float16,0,0.8708047866821289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,8,4,128,1,float16,fp8,0,0.7840511798858643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,8,8,128,1,float16,float16,0,0.5064703941345214
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,8,4,128,1,fp8,fp8,0,0.783951997756958
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,8,8,128,1,float16,fp8,0,0.4409520149230957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,8,1,128,1,float16,float16,0,0.40206079483032225
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,8,8,128,1,fp8,fp8,0,0.44066081047058103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,8,1,128,1,float16,fp8,0,0.3784447908401489
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,8,2,128,1,float16,float16,0,0.41169118881225586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,8,8,128,1,float16,float16,0,0.12488640546798706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,8,4,128,1,float16,float16,0,0.44364161491394044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,8,8,128,1,float16,float16,0,0.26251840591430664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,8,4,128,1,fp8,fp8,0,0.4012335777282715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,8,8,128,1,float16,fp8,0,0.227891206741333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,8,8,128,1,fp8,fp8,0,0.22782559394836427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,8,1,128,1,float16,float16,0,0.2095263957977295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,8,1,128,1,float16,fp8,0,0.1965440034866333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,8,1,128,1,fp8,fp8,0,0.19562079906463622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,8,2,128,1,float16,float16,0,0.21451199054718018
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,8,2,128,1,float16,fp8,0,0.19647040367126464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,8,2,128,1,fp8,fp8,0,0.1968127965927124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,8,4,128,1,float16,float16,0,0.22967360019683838
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,8,4,128,1,float16,fp8,0,0.20682721138000487
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,8,4,128,1,fp8,fp8,0,0.20725760459899903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,8,8,128,1,float16,fp8,0,0.12175040245056153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,8,8,128,1,fp8,fp8,0,0.12189760208129882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,8,1,128,1,float16,float16,0,0.1121008038520813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,8,1,128,1,float16,fp8,0,0.10495200157165527
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,8,1,128,1,fp8,fp8,0,0.10485600233078003
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,8,2,128,1,float16,float16,0,0.11491680145263672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,8,2,128,1,float16,fp8,0,0.10522240400314331
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,8,2,128,1,fp8,fp8,0,0.10519039630889893
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,8,4,128,1,float16,float16,0,0.12291200160980224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,8,4,128,1,float16,fp8,0,0.11014080047607422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,8,4,128,1,fp8,fp8,0,0.11068960428237914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,8,8,128,1,float16,float16,0,0.07690879702568054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,8,8,128,1,float16,fp8,0,0.06915040016174316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,8,8,128,1,fp8,fp8,0,0.06780319809913635
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,8,1,128,1,float16,float16,0,0.061862397193908694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,8,1,128,1,float16,fp8,0,0.05768479704856873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,8,1,128,1,fp8,fp8,0,0.05808479785919189
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,8,2,128,1,float16,float16,0,0.06452320218086242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,8,2,128,1,float16,fp8,0,0.05825279951095581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,8,1,128,1,fp8,fp8,0,0.3769551992416382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,8,4,128,1,float16,float16,0,0.06893919706344605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,8,2,128,1,float16,fp8,0,0.376691198348999
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,8,4,128,1,float16,fp8,0,0.060436797142028806
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,8,4,128,1,float16,fp8,0,0.39864161014556887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,8,8,128,1,float16,float16,0,0.04651359915733337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,8,8,128,1,fp8,fp8,0,0.03771840035915375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,8,1,128,1,float16,float16,0,0.03732640147209167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,8,1,128,1,float16,fp8,0,0.036111998558044436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,8,1,128,1,fp8,fp8,0,0.035504001379013064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,8,2,128,1,float16,float16,0,0.03781920075416565
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,8,2,128,1,float16,fp8,0,0.03569119870662689
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,8,2,128,1,fp8,fp8,0,0.0361407995223999
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,8,4,128,1,float16,float16,0,0.03889760076999664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,8,4,128,1,float16,fp8,0,0.03640480041503906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,8,4,128,1,fp8,fp8,0,0.03590080142021179
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,8,8,128,1,float16,float16,0,0.0270224004983902
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,8,8,128,1,float16,fp8,0,0.025
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,8,8,128,1,fp8,fp8,0,0.025286400318145753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,8,1,128,1,float16,float16,0,0.024667200446128846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,8,1,128,1,float16,fp8,0,0.024299199879169463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,8,8,128,1,float16,float16,0,0.13941760063171388
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,8,1,128,1,fp8,fp8,0,0.02401279956102371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,8,2,128,1,float16,float16,0,0.025303998589515687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,8,2,128,1,float16,fp8,0,0.024036799371242524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,8,4,128,1,float16,float16,0,0.025791999697685242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,8,2,128,1,fp8,fp8,0,0.024027200043201448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,8,4,128,1,float16,fp8,0,0.02446880042552948
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,8,4,128,1,fp8,fp8,0,0.024476799368858337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,8,8,128,1,float16,float16,0,0.02481919974088669
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,8,8,128,1,float16,fp8,0,0.02452480047941208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,8,8,128,1,fp8,fp8,0,0.024582399427890776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,8,1,128,1,float16,float16,0,0.023647999763488768
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,8,1,128,1,float16,fp8,0,0.023705600202083586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,8,1,128,1,fp8,fp8,0,0.02380640059709549
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,8,2,128,1,float16,float16,0,0.02364639937877655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,8,2,128,1,float16,fp8,0,0.02377759963274002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,8,2,128,1,fp8,fp8,0,0.024035200476646423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,8,4,128,1,float16,float16,0,0.023963199555873872
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,8,4,128,1,float16,fp8,0,0.024031999707221984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,8,4,128,1,fp8,fp8,0,0.02422720044851303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,8,1,128,1,float16,float16,0,0.5017136096954345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,8,1,128,1,float16,fp8,0,0.48442559242248534
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,8,1,128,1,fp8,fp8,0,0.4846047878265381
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,8,2,128,1,fp8,fp8,0,0.057687997817993164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,8,2,128,1,fp8,fp8,0,0.3781696081161499
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,8,4,128,1,fp8,fp8,0,0.060844802856445314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,8,2,128,1,fp8,fp8,0,0.48611998558044434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,8,8,128,1,float16,fp8,0,0.037854400277137754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,8,4,128,1,float16,float16,0,0.5843120098114014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,8,8,128,1,float16,float16,0,0.36464319229125974
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,8,4,128,1,float16,fp8,0,0.5323599815368653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,8,8,128,1,float16,fp8,0,0.3198368072509766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,8,4,128,1,fp8,fp8,0,0.5320576190948486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,8,8,128,1,fp8,fp8,0,0.319648003578186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,8,1,128,1,float16,float16,0,0.25684640407562254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,8,1,128,1,float16,fp8,0,0.24922080039978028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,8,1,128,1,fp8,fp8,0,0.24978239536285402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,8,2,128,1,float16,float16,0,0.26702558994293213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,8,2,128,1,float16,fp8,0,0.2500783920288086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,8,2,128,1,fp8,fp8,0,0.24987199306488037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,8,4,128,1,float16,float16,0,0.29989919662475584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,8,4,128,1,float16,fp8,0,0.273963189125061
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,8,8,128,1,float16,float16,0,0.1895519971847534
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,8,8,128,1,float16,fp8,0,0.166703999042511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,8,8,128,1,fp8,fp8,0,0.16680320501327514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,8,1,128,1,float16,float16,0,0.13613760471343994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,8,1,128,1,float16,fp8,0,0.13239840269088746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,8,1,128,1,fp8,fp8,0,0.13146560192108153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,8,2,128,1,float16,fp8,0,0.13220319747924805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,8,2,128,1,fp8,fp8,0,0.13173919916152954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,8,4,128,1,float16,float16,0,0.15769439935684204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,8,4,128,1,float16,fp8,0,0.14358240365982056
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,8,8,128,1,float16,float16,0,0.10251679420471191
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,8,4,128,1,fp8,fp8,0,0.14364960193634033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,8,8,128,1,float16,fp8,0,0.09044479727745056
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,8,8,128,1,fp8,fp8,0,0.09054080247879029
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,8,1,128,1,float16,float16,0,0.07514560222625732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,8,1,128,1,float16,fp8,0,0.07207679748535156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,8,1,128,1,fp8,fp8,0,0.07189120054244995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,8,2,128,1,float16,fp8,0,0.0724943995475769
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,8,2,128,1,fp8,fp8,0,0.07227839827537537
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,8,4,128,1,float16,float16,0,0.08620960116386414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,8,4,128,1,float16,fp8,0,0.07876160144805908
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,8,4,128,1,fp8,fp8,0,0.0790719985961914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,8,8,128,1,float16,float16,0,0.05860000252723694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,8,8,128,1,float16,fp8,0,0.052072000503540036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,8,8,128,1,fp8,fp8,0,0.051801598072052
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,8,1,128,1,float16,float16,0,0.04203679859638214
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,8,1,128,1,float16,fp8,0,0.04015679955482483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,8,1,128,1,fp8,fp8,0,0.040510401129722595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,8,2,128,1,float16,fp8,0,0.4855775833129883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,8,2,128,1,float16,fp8,0,0.04074240028858185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,8,2,128,1,fp8,fp8,0,0.040468800067901614
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,8,4,128,1,float16,fp8,0,0.04311360120773315
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,8,4,128,1,fp8,fp8,0,0.04325439929962158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,8,8,128,1,float16,float16,0,0.03540160059928894
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,8,8,128,1,float16,fp8,0,0.029254400730133058
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,8,8,128,1,fp8,fp8,0,0.029265600442886352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,8,1,128,1,float16,float16,0,0.026502400636672974
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,8,1,128,1,float16,fp8,0,0.02606239914894104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,8,1,128,1,fp8,fp8,0,0.0261135995388031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,8,2,128,1,float16,float16,0,0.026710399985313417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,8,2,128,1,float16,fp8,0,0.02622080147266388
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,8,2,128,1,fp8,fp8,0,0.026212799549102783
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,8,4,128,1,float16,float16,0,0.02797279953956604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,8,4,128,1,fp8,fp8,0,0.27331359386444093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,8,4,128,1,float16,fp8,0,0.026555201411247252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,8,4,128,1,fp8,fp8,0,0.0267551988363266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,8,2,128,1,float16,float16,0,0.14067200422286988
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,8,8,128,1,float16,fp8,0,0.018755200505256652
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,8,8,128,1,fp8,fp8,0,0.018798400461673737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,8,1,128,1,float16,fp8,0,0.017550399899482726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,8,1,128,1,fp8,fp8,0,0.017825600504875184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,8,2,128,1,float16,float16,0,0.01757279932498932
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,8,2,128,1,float16,fp8,0,0.017560000717639922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,8,2,128,1,float16,float16,0,0.07773119807243348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,8,2,128,1,fp8,fp8,0,0.0176256000995636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,8,4,128,1,float16,fp8,0,0.017985600233078002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,8,4,128,1,float16,float16,0,0.018086400628089905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,8,4,128,1,fp8,fp8,0,0.018147200345993042
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,8,8,128,1,float16,float16,0,0.01797440052032471
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,8,8,128,1,float16,fp8,0,0.018172800540924072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,8,8,128,1,fp8,fp8,0,0.01798879951238632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,8,1,128,1,float16,float16,0,0.01682559996843338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,8,1,128,1,float16,fp8,0,0.017254400253295898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,8,1,128,1,fp8,fp8,0,0.017374399304389953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,8,2,128,1,float16,float16,0,0.01701280027627945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,8,2,128,1,float16,float16,0,0.5226143836975098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,8,2,128,1,fp8,fp8,0,0.01717599928379059
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,8,4,128,1,float16,float16,0,0.017262400686740877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,8,4,128,1,float16,float16,0,0.04987359941005707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,8,4,128,1,float16,fp8,0,0.017446400225162507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,8,8,128,1,float16,float16,0,0.017444799840450286
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,8,8,128,1,float16,fp8,0,0.01764640063047409
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,8,8,128,1,fp8,fp8,0,0.017785599827766417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,8,1,128,1,float16,float16,0,0.016646400094032288
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,8,1,128,1,float16,fp8,0,0.016896000504493712
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,8,1,128,1,fp8,fp8,0,0.016804799437522888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,8,2,128,1,float16,float16,0,0.016734400391578676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,8,2,128,1,float16,fp8,0,0.016734400391578676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,8,2,128,1,fp8,fp8,0,0.016940799355506898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,8,4,128,1,float16,float16,0,0.016628800332546233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,8,4,128,1,float16,fp8,0,0.01701440066099167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,8,4,128,1,fp8,fp8,0,0.016812799870967864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,8,1,128,1,float16,float16,0,0.21191039085388183
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,8,1,128,1,fp8,fp8,0,0.21518559455871583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,8,2,128,1,float16,float16,0,0.2228895902633667
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,8,2,128,1,float16,fp8,0,0.21592159271240235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,8,8,128,1,float16,float16,0,0.019334399700164796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,8,2,128,1,fp8,fp8,0,0.21559679508209229
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,8,1,128,1,float16,float16,0,0.01730239987373352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,8,4,128,1,float16,float16,0,0.25632638931274415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,8,4,128,1,fp8,fp8,0,0.23895039558410644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,8,4,128,1,float16,fp8,0,0.23866560459136962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,8,8,128,1,float16,fp8,0,0.1434607982635498
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,8,1,128,1,float16,float16,0,0.11292799711227416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,8,8,128,1,fp8,fp8,0,0.1434880018234253
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,8,1,128,1,fp8,fp8,0,0.11321760416030884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,8,2,128,1,float16,float16,0,0.11829440593719483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,8,2,128,1,fp8,fp8,0,0.1144752025604248
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,8,2,128,1,float16,float16,0,0.045228800177574156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,8,4,128,1,float16,float16,0,0.13537280559539794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,8,4,128,1,float16,fp8,0,0.12559360265731812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,8,4,128,1,fp8,fp8,0,0.017262400686740877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,8,8,128,1,float16,float16,0,0.09289439916610717
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,8,8,128,1,float16,fp8,0,0.0782368004322052
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,8,8,128,1,fp8,fp8,0,0.07881600260734559
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,8,1,128,1,float16,float16,0,0.0635919988155365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,8,1,128,1,float16,fp8,0,0.06308479905128479
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,8,1,128,1,fp8,fp8,0,0.06288319826126099
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,8,2,128,1,float16,float16,0,0.06635199785232544
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,8,2,128,1,float16,fp8,0,0.06343039870262146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,8,2,128,1,fp8,fp8,0,0.06334400177001953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,8,1,128,1,float16,fp8,0,0.21536800861358643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,8,4,128,1,float16,float16,0,0.07527199983596802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,8,4,128,1,float16,fp8,0,0.0696943998336792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,8,8,128,1,float16,float16,0,0.052983999252319336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,8,4,128,1,fp8,fp8,0,0.06984480023384095
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,8,8,128,1,float16,fp8,0,0.044531199336051944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,8,8,128,1,fp8,fp8,0,0.04457440078258514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,8,1,128,1,float16,float16,0,0.03557440042495728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,8,1,128,1,float16,fp8,0,0.034734401106834414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,8,1,128,1,fp8,fp8,0,0.034980800747871396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,8,2,128,1,float16,float16,0,0.03865920007228851
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,8,2,128,1,fp8,fp8,0,0.03493280112743378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,8,4,128,1,float16,float16,0,0.04335519969463349
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,8,4,128,1,float16,fp8,0,0.03779360055923462
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,8,4,128,1,fp8,fp8,0,0.03784320056438446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,8,8,128,1,float16,float16,0,0.03054719865322113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,8,8,128,1,float16,float16,0,0.16917279958724976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,8,8,128,1,float16,fp8,0,0.023787200450897217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,8,1,128,1,float16,fp8,0,0.11415040493011475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,8,8,128,1,fp8,fp8,0,0.02388000041246414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,8,1,128,1,float16,fp8,0,0.022115199267864226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,8,1,128,1,fp8,fp8,0,0.021993599832057953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,8,2,128,1,float16,float16,0,0.02120320051908493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,8,2,128,1,fp8,fp8,0,0.021932800114154816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,8,4,128,1,float16,float16,0,0.02292959988117218
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,8,2,128,1,float16,fp8,0,0.017334400117397307
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,8,4,128,1,fp8,fp8,0,0.02220800071954727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,8,4,128,1,fp8,fp8,0,0.12610559463500975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,8,8,128,1,float16,float16,0,0.01650879979133606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,8,8,128,1,float16,fp8,0,0.015859200060367583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,8,1,128,1,float16,float16,0,0.014414399862289429
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,8,1,128,1,float16,fp8,0,0.01483519971370697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,8,1,128,1,fp8,fp8,0,0.014777599275112152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,8,2,128,1,float16,float16,0,0.014659200608730317
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,8,2,128,1,float16,fp8,0,0.014686399698257446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,8,2,128,1,fp8,fp8,0,0.014691199362277984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,8,4,128,1,float16,float16,0,0.01534239947795868
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,8,4,128,1,float16,fp8,0,0.015201599895954132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,8,4,128,1,fp8,fp8,0,0.01526239961385727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,8,8,128,1,float16,float16,0,0.015182399749755859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,8,8,128,1,float16,fp8,0,0.015143999457359314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,8,8,128,1,fp8,fp8,0,0.015057599544525147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,8,2,128,1,float16,fp8,0,0.034857600927352905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,8,1,128,1,float16,float16,0,0.013977600634098053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,8,1,128,1,float16,fp8,0,0.01408960074186325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,8,1,128,1,fp8,fp8,0,0.01409280002117157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,8,2,128,1,float16,float16,0,0.013808000087738036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,8,2,128,1,float16,fp8,0,0.014241600036621093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,8,2,128,1,fp8,fp8,0,0.014273600280284881
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,8,4,128,1,float16,float16,0,0.014379200339317322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,8,4,128,1,float16,fp8,0,0.014655999839305878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,8,4,128,1,fp8,fp8,0,0.014535999298095703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,8,8,128,1,float16,float16,0,0.014412799477577209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,8,8,128,1,float16,fp8,0,0.014644800126552582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,8,8,128,1,fp8,fp8,0,0.014691199362277984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,8,1,128,1,float16,float16,0,0.013742400705814362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,8,1,128,1,float16,float16,0,0.02096319943666458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,8,1,128,1,float16,fp8,0,0.014169600605964661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,8,1,128,1,fp8,fp8,0,0.013984000682830811
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,8,2,128,1,float16,fp8,0,0.021902400255203246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,8,2,128,1,float16,float16,0,0.013684800267219544
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,8,2,128,1,float16,fp8,0,0.014059199392795563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,8,2,128,1,float16,fp8,0,0.11360640525817871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,8,4,128,1,float16,fp8,0,0.014076800644397735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,8,4,128,1,fp8,fp8,0,0.014433600008487701
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,8,4,128,1,float16,fp8,0,0.02235199958086014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,8,8,128,1,float16,fp8,0,0.013875199854373932
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,8,8,128,1,fp8,fp8,0,0.01417279988527298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,8,1,128,1,float16,float16,0,0.013148799538612366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,8,1,128,1,fp8,fp8,0,0.013542400300502777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,8,2,128,1,float16,float16,0,0.013193599879741669
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,8,2,128,1,float16,fp8,0,0.013681599497795105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,8,2,128,1,fp8,fp8,0,0.01371839940547943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,8,4,128,1,float16,float16,0,0.013382400572299957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,8,4,128,1,fp8,fp8,0,0.013840000331401824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,8,1,128,1,float16,float16,0,0.10612319707870484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,8,1,128,1,float16,fp8,0,0.11119519472122193
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,8,1,128,1,fp8,fp8,0,0.11070879697799682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,8,2,128,1,float16,float16,0,0.11232000589370728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,8,2,128,1,float16,fp8,0,0.11207840442657471
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,8,2,128,1,fp8,fp8,0,0.11239839792251587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,8,4,128,1,float16,float16,0,0.1314687967300415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,8,4,128,1,float16,fp8,0,0.12266080379486084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,8,4,128,1,fp8,fp8,0,0.12275680303573608
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,8,8,128,1,float16,float16,0,0.09235519766807557
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,8,8,128,1,float16,fp8,0,0.07615680098533631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,8,8,128,1,fp8,fp8,0,0.07634400129318238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,8,1,128,1,float16,float16,0,0.06059200167655945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,8,1,128,1,float16,fp8,0,0.06038560271263123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,8,1,128,1,fp8,fp8,0,0.060247999429702756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,8,2,128,1,float16,float16,0,0.06352959871292115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,8,2,128,1,float16,fp8,0,0.060977602005004884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,8,2,128,1,fp8,fp8,0,0.06101920008659363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,8,4,128,1,float16,float16,0,0.07222719788551331
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,8,2,128,1,fp8,fp8,0,0.013979199528694152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,8,4,128,1,float16,fp8,0,0.06511840224266052
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,8,4,128,1,fp8,fp8,0,0.06545760035514832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,8,8,128,1,float16,fp8,0,0.040550398826599124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,8,8,128,1,fp8,fp8,0,0.04093759953975677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,8,4,128,1,float16,float16,0,0.014030399918556213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,8,1,128,1,float16,float16,0,0.03172479867935181
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,8,1,128,1,float16,fp8,0,0.030913600325584413
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,8,8,128,1,float16,float16,0,0.013411200046539307
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,8,1,128,1,fp8,fp8,0,0.030820798873901368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,8,2,128,1,float16,fp8,0,0.031272000074386595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,8,4,128,1,float16,float16,0,0.04087679982185364
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,8,4,128,1,float16,fp8,0,0.01408800035715103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,8,4,128,1,fp8,fp8,0,0.033537599444389346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,8,8,128,1,float16,float16,0,0.02839680016040802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,8,8,128,1,float16,fp8,0,0.020367999374866486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,8,8,128,1,fp8,fp8,0,0.020364800095558168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,8,1,128,1,float16,float16,0,0.018606400489807128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,8,1,128,1,float16,fp8,0,0.019172799587249757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,8,2,128,1,float16,float16,0,0.018783999979496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,8,8,128,1,float16,float16,0,0.05066879987716675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,8,2,128,1,float16,fp8,0,0.019067199528217317
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,8,2,128,1,float16,float16,0,0.03597440123558045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,8,1,128,1,float16,fp8,0,0.013471999764442444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,8,4,128,1,float16,float16,0,0.020275199413299562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,8,4,128,1,float16,fp8,0,0.019366399943828584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,8,4,128,1,fp8,fp8,0,0.01932159960269928
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,8,8,128,1,fp8,fp8,0,0.015836800634860992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,8,8,128,1,float16,float16,0,0.015031999349594117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,8,8,128,1,fp8,fp8,0,0.014020800590515137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,8,1,128,1,float16,float16,0,0.013075199723243714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,8,1,128,1,float16,fp8,0,0.013337600231170654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,8,1,128,1,fp8,fp8,0,0.013583999872207642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,8,2,128,1,float16,float16,0,0.013185599446296692
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,8,2,128,1,float16,fp8,0,0.013265599310398103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,8,2,128,1,fp8,fp8,0,0.013449600338935852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,8,1,128,1,fp8,fp8,0,0.019116799533367156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,8,4,128,1,float16,float16,0,0.013875199854373932
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,8,4,128,1,float16,fp8,0,0.01353919953107834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,8,4,128,1,fp8,fp8,0,0.013465599715709686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,8,8,128,1,float16,fp8,0,0.013262400031089782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,8,8,128,1,fp8,fp8,0,0.012948800623416901
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,8,1,128,1,float16,fp8,0,0.012862400710582733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,8,1,128,1,fp8,fp8,0,0.012654399871826172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,8,2,128,1,float16,float16,0,0.012656000256538392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,8,2,128,1,float16,fp8,0,0.012857599556446076
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,8,2,128,1,fp8,fp8,0,0.012700800597667695
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,8,4,128,1,float16,float16,0,0.01292479932308197
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,8,4,128,1,float16,fp8,0,0.012683199346065521
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,8,4,128,1,fp8,fp8,0,0.012705600261688233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,8,8,128,1,float16,float16,0,0.012833599746227265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,8,8,128,1,float16,fp8,0,0.012774400413036346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,8,8,128,1,fp8,fp8,0,0.012715199589729309
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,8,1,128,1,float16,float16,0,0.012088000029325485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,8,1,128,1,float16,fp8,0,0.012415999919176102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,8,1,128,1,fp8,fp8,0,0.012392000108957291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,8,2,128,1,float16,float16,0,0.012169600278139115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,8,2,128,1,float16,fp8,0,0.012649600207805634
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,8,2,128,1,fp8,fp8,0,0.012862400710582733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,8,4,128,1,float16,float16,0,0.012532800436019897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,8,4,128,1,float16,fp8,0,0.012627199292182922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,8,4,128,1,fp8,fp8,0,0.012796799838542938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,8,2,128,1,fp8,fp8,0,0.0192671999335289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,8,8,128,1,float16,float16,0,0.012177599966526032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,8,8,128,1,fp8,fp8,0,0.012206400185823441
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,8,1,128,1,float16,float16,0,0.011913599818944931
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,8,8,128,1,float16,fp8,0,0.014169600605964661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,8,1,128,1,fp8,fp8,0,0.011928000301122666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,8,2,128,1,float16,float16,0,0.011798399686813354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,8,2,128,1,float16,fp8,0,0.011987199634313583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,8,2,128,1,fp8,fp8,0,0.012012799829244613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,8,4,128,1,float16,float16,0,0.012035199999809265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,8,8,128,1,float16,float16,0,0.013500800728797913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,8,4,128,1,float16,fp8,0,0.01197120025753975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,8,1,128,1,float16,float16,0,0.01252640038728714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,8,8,128,1,float16,fp8,0,0.011952000111341477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,8,8,128,1,fp8,fp8,0,0.011816000193357467
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,8,1,128,1,float16,float16,0,0.011550399661064147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,8,1,128,1,float16,fp8,0,0.011803200095891952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,8,1,128,1,fp8,fp8,0,0.011982399970293045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,8,2,128,1,float16,float16,0,0.011716800183057785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,8,2,128,1,float16,fp8,0,0.011847999691963196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,8,2,128,1,fp8,fp8,0,0.01175519973039627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,8,4,128,1,float16,float16,0,0.011497599631547928
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,8,4,128,1,float16,fp8,0,0.011817599833011627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,8,4,128,1,fp8,fp8,0,0.011849600076675414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,8,1,128,1,float16,float16,0,0.061110401153564455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,8,1,128,1,float16,fp8,0,0.061622399091720584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,8,1,128,1,fp8,fp8,0,0.061536002159118655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,8,2,128,1,float16,float16,0,0.06487680077552796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,8,2,128,1,float16,fp8,0,0.0622111976146698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,8,2,128,1,fp8,fp8,0,0.06226080060005188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,8,4,128,1,float16,float16,0,0.0739184021949768
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,8,4,128,1,float16,fp8,0,0.06656320095062256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,8,4,128,1,fp8,fp8,0,0.06670719981193543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,8,8,128,1,float16,float16,0,0.0584384024143219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,8,8,128,1,float16,fp8,0,0.044886401295661925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,8,8,128,1,fp8,fp8,0,0.04476960003376007
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,8,1,128,1,float16,float16,0,0.03131360113620758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,8,8,128,1,float16,fp8,0,0.012291199713945388
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,8,1,128,1,float16,fp8,0,0.0315200001001358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,8,1,128,1,fp8,fp8,0,0.031462401151657104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,8,1,128,1,float16,fp8,0,0.012174399942159653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,8,2,128,1,float16,fp8,0,0.03192960023880005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,8,4,128,1,float16,float16,0,0.04149760007858276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,8,4,128,1,float16,fp8,0,0.033287999033927915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,8,4,128,1,fp8,fp8,0,0.03365919888019562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,8,8,128,1,float16,float16,0,0.03101919889450073
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,8,8,128,1,float16,fp8,0,0.02253919988870621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,8,8,128,1,fp8,fp8,0,0.02253279983997345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,8,1,128,1,float16,float16,0,0.018918399512767792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,8,4,128,1,fp8,fp8,0,0.012012799829244613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,8,1,128,1,fp8,fp8,0,0.019596800208091736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,8,2,128,1,float16,float16,0,0.018838399648666383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,8,2,128,1,float16,fp8,0,0.01955360025167465
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,8,2,128,1,fp8,fp8,0,0.019598400592803954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,8,4,128,1,float16,fp8,0,0.019734400510787963
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,8,4,128,1,fp8,fp8,0,0.019752000272274018
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,8,8,128,1,float16,float16,0,0.01658399999141693
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,8,8,128,1,float16,fp8,0,0.014907200634479523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,8,2,128,1,fp8,fp8,0,0.03136320114135742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,8,2,128,1,float16,float16,0,0.036262398958206175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,8,2,128,1,fp8,fp8,0,0.03163999915122986
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,8,8,128,1,fp8,fp8,0,0.014801600575447082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,8,1,128,1,float16,float16,0,0.013447999954223633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,8,1,128,1,float16,fp8,0,0.013764800131320953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,8,2,128,1,float16,float16,0,0.013670399785041809
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,8,2,128,1,float16,fp8,0,0.013756799697875976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,8,2,128,1,fp8,fp8,0,0.013884800672531127
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,8,4,128,1,float16,float16,0,0.014214399456977844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,8,4,128,1,float16,fp8,0,0.01404000073671341
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,8,4,128,1,fp8,fp8,0,0.01403840035200119
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,8,8,128,1,float16,float16,0,0.01202400028705597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,8,8,128,1,float16,fp8,0,0.011168000102043153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,8,8,128,1,fp8,fp8,0,0.011307200044393539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,8,1,128,1,float16,float16,0,0.012352000176906585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,8,1,128,1,float16,fp8,0,0.012904000282287598
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,8,1,128,1,fp8,fp8,0,0.012596799433231354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,8,4,128,1,float16,float16,0,0.020027199387550355
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,8,2,128,1,float16,fp8,0,0.012649600207805634
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,8,2,128,1,fp8,fp8,0,0.012787200510501862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,8,4,128,1,float16,fp8,0,0.012814399600028992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,8,4,128,1,fp8,fp8,0,0.012839999794960023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,8,8,128,1,float16,float16,0,0.011159999668598175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,8,8,128,1,float16,fp8,0,0.010652799904346467
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,8,8,128,1,fp8,fp8,0,0.010649599879980088
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,8,1,128,1,float16,float16,0,0.012409599870443344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,8,1,128,1,float16,fp8,0,0.012590399384498597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,8,1,128,1,fp8,fp8,0,0.012731200456619263
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,8,2,128,1,float16,float16,0,0.01231039986014366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,8,2,128,1,float16,fp8,0,0.012630400061607362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,8,2,128,1,fp8,fp8,0,0.012718400359153748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,8,4,128,1,float16,float16,0,0.012468799948692322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,8,4,128,1,float16,fp8,0,0.013048000633716583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,8,4,128,1,fp8,fp8,0,0.01281760036945343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,8,8,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,8,8,128,1,float16,fp8,0,0.010259199887514114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,8,4,128,1,float16,fp8,0,0.033888000249862674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,8,8,128,1,fp8,fp8,0,0.010152000188827514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,8,1,128,1,float16,float16,0,0.011868800222873687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,8,1,128,1,fp8,fp8,0,0.01218239963054657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,8,1,128,1,fp8,fp8,0,0.013953599333763122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,8,2,128,1,float16,float16,0,0.011896000057458878
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,8,2,128,1,fp8,fp8,0,0.011992000043392181
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,8,4,128,1,float16,float16,0,0.011964800208806992
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,8,4,128,1,float16,fp8,0,0.012321600317955017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,8,4,128,1,fp8,fp8,0,0.012080000340938568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,8,1,128,1,float16,fp8,0,0.019630399346351624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,8,8,128,1,float16,float16,0,0.009950400143861771
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,8,8,128,1,fp8,fp8,0,0.009627199918031692
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,8,1,128,1,float16,float16,0,0.01165279969573021
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,8,1,128,1,float16,fp8,0,0.011979199945926666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,8,1,128,1,fp8,fp8,0,0.012083200365304947
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,8,4,128,1,float16,float16,0,0.01263359934091568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,8,2,128,1,fp8,fp8,0,0.011766400188207626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,8,4,128,1,float16,float16,0,0.01146719977259636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,8,4,128,1,float16,fp8,0,0.011803200095891952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,8,4,128,1,fp8,fp8,0,0.0117807999253273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,8,8,128,1,float16,float16,0,0.009907200187444686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,8,8,128,1,float16,fp8,0,0.009724800288677216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,8,8,128,1,fp8,fp8,0,0.009640000015497207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,8,1,128,1,float16,float16,0,0.011587200313806533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,8,1,128,1,float16,fp8,0,0.011870399862527848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,8,1,128,1,fp8,fp8,0,0.011774399876594543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,8,2,128,1,float16,float16,0,0.011343999952077865
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,8,2,128,1,float16,fp8,0,0.011648000031709672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,8,2,128,1,fp8,fp8,0,0.011649599671363831
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,8,4,128,1,float16,float16,0,0.011289600282907486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,8,4,128,1,float16,fp8,0,0.011635199934244157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,8,4,128,1,fp8,fp8,0,0.011819200217723846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,8,1,128,1,float16,float16,0,0.033374398946762085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,8,1,128,1,float16,fp8,0,0.034016001224517825
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,8,1,128,1,fp8,fp8,0,0.033852800726890564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,8,2,128,1,float16,fp8,0,0.012356799840927125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,8,2,128,1,float16,float16,0,0.03727520108222961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,8,2,128,1,float16,fp8,0,0.03437120020389557
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,8,2,128,1,fp8,fp8,0,0.034115201234817503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,8,8,128,1,float16,float16,0,0.011790399998426437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,8,4,128,1,float16,float16,0,0.04795039892196655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,8,4,128,1,float16,fp8,0,0.041119998693466185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,8,8,128,1,float16,fp8,0,0.009616000205278396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,8,8,128,1,float16,float16,0,0.04217920005321503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,8,4,128,1,fp8,fp8,0,0.03934240043163299
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,8,1,128,1,float16,float16,0,0.0195360004901886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,8,2,128,1,float16,float16,0,0.01284320056438446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,8,1,128,1,float16,fp8,0,0.020695999264717102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,8,1,128,1,fp8,fp8,0,0.020582400262355804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,8,2,128,1,float16,fp8,0,0.01188960000872612
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,8,2,128,1,float16,fp8,0,0.02083680033683777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,8,4,128,1,float16,float16,0,0.02442079931497574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,8,4,128,1,float16,fp8,0,0.022475199401378633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,8,4,128,1,fp8,fp8,0,0.022595199942588805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,8,8,128,1,float16,float16,0,0.022228799760341644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,8,8,128,1,float16,fp8,0,0.02074880003929138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,8,8,128,1,fp8,fp8,0,0.0208079993724823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,8,1,128,1,float16,float16,0,0.01385599970817566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,8,1,128,1,float16,fp8,0,0.014212800562381745
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,8,1,128,1,fp8,fp8,0,0.014233599603176116
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,8,2,128,1,float16,float16,0,0.013900800049304963
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,8,2,128,1,float16,fp8,0,0.014392000436782838
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,8,1,128,1,float16,fp8,0,0.012067200243473053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,8,4,128,1,float16,float16,0,0.015748800337314607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,8,4,128,1,float16,fp8,0,0.014932799339294433
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,8,4,128,1,fp8,fp8,0,0.01493919938802719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,8,8,128,1,float16,float16,0,0.014468799531459808
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,8,8,128,1,float16,fp8,0,0.013769599795341491
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,8,8,128,1,float16,fp8,0,0.03462400138378143
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,8,8,128,1,fp8,fp8,0,0.03463039994239807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,8,1,128,1,float16,float16,0,0.012904000282287598
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,8,2,128,1,float16,float16,0,0.011670400202274323
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,8,1,128,1,fp8,fp8,0,0.013153600692749023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,8,2,128,1,float16,float16,0,0.012886400520801543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,8,2,128,1,fp8,fp8,0,0.02048480063676834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,8,2,128,1,float16,fp8,0,0.013019199669361114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,8,2,128,1,fp8,fp8,0,0.013193599879741669
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,8,4,128,1,float16,float16,0,0.011403200030326844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,8,4,128,1,float16,fp8,0,0.011155200004577637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,8,4,128,1,fp8,fp8,0,0.011057599633932113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,8,8,128,1,float16,float16,0,0.011204800009727478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,8,8,128,1,fp8,fp8,0,0.010815999656915664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,8,1,128,1,float16,float16,0,0.012619200348854064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,8,1,128,1,float16,fp8,0,0.012756800651550293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,8,1,128,1,fp8,fp8,0,0.012755200266838074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,8,2,128,1,float16,float16,0,0.0124719999730587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,8,2,128,1,fp8,fp8,0,0.012732799351215362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,8,2,128,1,fp8,fp8,0,0.014127999544143677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,8,4,128,1,float16,fp8,0,0.010332799702882766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,8,4,128,1,fp8,fp8,0,0.010366400331258773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,8,8,128,1,float16,float16,0,0.01064639985561371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,8,8,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,8,8,128,1,fp8,fp8,0,0.010520000010728836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,8,1,128,1,float16,float16,0,0.01223680004477501
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,8,1,128,1,float16,fp8,0,0.012089599668979645
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,8,1,128,1,fp8,fp8,0,0.012377600371837615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,8,2,128,1,float16,float16,0,0.012120000272989272
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,8,2,128,1,float16,fp8,0,0.012179200351238251
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,8,2,128,1,fp8,fp8,0,0.012135999649763108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,8,4,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,8,4,128,1,float16,fp8,0,0.009889599680900574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,8,4,128,1,fp8,fp8,0,0.010196799784898758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,8,8,128,1,float16,float16,0,0.010016000270843506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,8,8,128,1,float16,fp8,0,0.010063999891281128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,8,8,128,1,fp8,fp8,0,0.00973920002579689
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,8,1,128,1,float16,float16,0,0.011720000207424164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,8,1,128,1,float16,fp8,0,0.01191840022802353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,8,1,128,1,fp8,fp8,0,0.01188960000872612
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,8,2,128,1,float16,float16,0,0.011603199690580369
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,8,2,128,1,float16,fp8,0,0.011846400052309036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,8,2,128,1,fp8,fp8,0,0.011963199824094772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,8,4,128,1,float16,float16,0,0.009857600182294845
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,8,4,128,1,float16,fp8,0,0.009457600116729737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,8,4,128,1,fp8,fp8,0,0.009657599776983262
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,8,8,128,1,float16,float16,0,0.00963359996676445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,8,8,128,1,fp8,fp8,0,0.013916799426078796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,8,8,128,1,fp8,fp8,0,0.009567999839782714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,8,1,128,1,float16,float16,0,0.011532799899578094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,8,1,128,1,float16,fp8,0,0.01189119964838028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,8,1,128,1,fp8,fp8,0,0.01186399981379509
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,8,2,128,1,float16,float16,0,0.011555200070142746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,8,2,128,1,float16,fp8,0,0.011816000193357467
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,8,2,128,1,float16,float16,0,0.019707199931144715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,8,2,128,1,fp8,fp8,0,0.01178240031003952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,8,4,128,1,float16,float16,0,0.00987040027976036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,8,4,128,1,fp8,fp8,0,0.009531199932098389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,8,4,128,1,float16,fp8,0,0.009643200039863586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,8,8,128,1,float16,fp8,0,0.009415999799966813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,8,8,128,1,float16,fp8,0,0.010847999900579452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,8,1,128,1,float16,fp8,0,0.01173119992017746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,8,2,128,1,float16,fp8,0,0.012667199969291687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,8,2,128,1,float16,float16,0,0.01136159971356392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,8,4,128,1,float16,float16,0,0.010836800187826156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,8,2,128,1,float16,fp8,0,0.011832000315189361
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,8,2,128,1,fp8,fp8,0,0.011897599697113037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,8,4,128,1,float16,float16,0,0.009748800098896027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,8,4,128,1,float16,fp8,0,0.009590400010347366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,8,4,128,1,fp8,fp8,0,0.009556800127029419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,8,1,128,1,float16,float16,0,0.022222399711608887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,8,1,128,1,float16,fp8,0,0.02298080027103424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,8,1,128,1,fp8,fp8,0,0.023311999440193177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,8,2,128,1,float16,float16,0,0.024979199469089507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,8,2,128,1,float16,fp8,0,0.024833600223064422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,8,2,128,1,fp8,fp8,0,0.02455040067434311
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,8,4,128,1,float16,float16,0,0.03747040033340454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,8,4,128,1,float16,fp8,0,0.035795199871063235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,8,4,128,1,fp8,fp8,0,0.03601920008659363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,8,8,128,1,float16,float16,0,0.034729599952697754
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,8,8,128,1,float16,fp8,0,0.03335680067539215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,8,8,128,1,fp8,fp8,0,0.03298400044441223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,8,1,128,1,float16,float16,0,0.015148800611495972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,8,1,128,1,float16,fp8,0,0.015695999562740325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,8,1,128,1,fp8,fp8,0,0.015411199629306793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,8,2,128,1,float16,float16,0,0.01627359986305237
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,8,2,128,1,float16,fp8,0,0.01608320027589798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,8,2,128,1,fp8,fp8,0,0.0160848006606102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,8,4,128,1,float16,fp8,0,0.021807999908924104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,8,4,128,1,fp8,fp8,0,0.02182080000638962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,8,8,128,1,float16,float16,0,0.020953600108623505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,8,8,128,1,float16,fp8,0,0.020084799826145174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,8,8,128,1,fp8,fp8,0,0.019971199333667755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,8,8,128,1,float16,fp8,0,0.009595199674367904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,8,1,128,1,float16,float16,0,0.01321599930524826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,8,1,128,1,float16,fp8,0,0.01401599943637848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,8,1,128,1,float16,fp8,0,0.013060800731182098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,8,2,128,1,float16,float16,0,0.011849600076675414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,8,2,128,1,float16,fp8,0,0.011550399661064147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,8,2,128,1,fp8,fp8,0,0.011503999680280685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,8,8,128,1,float16,float16,0,0.009798400104045868
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,8,4,128,1,float16,float16,0,0.014977599680423736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,8,4,128,1,float16,fp8,0,0.01443839967250824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,8,8,128,1,fp8,fp8,0,0.009649600088596343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,8,8,128,1,float16,float16,0,0.014315199851989747
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,8,1,128,1,float16,float16,0,0.01138719990849495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,8,4,128,1,fp8,fp8,0,0.014300799369812012
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,8,8,128,1,fp8,fp8,0,0.013822400569915771
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,8,1,128,1,float16,float16,0,0.012676799297332763
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,8,1,128,1,float16,fp8,0,0.013072000443935394
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,8,1,128,1,fp8,fp8,0,0.013115200400352477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,8,2,128,1,float16,float16,0,0.011209599673748016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,8,2,128,1,float16,fp8,0,0.010920000076293946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,8,2,128,1,fp8,fp8,0,0.011104000359773636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,8,4,128,1,float16,float16,0,0.011184000223875046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,8,4,128,1,fp8,fp8,0,0.01106399968266487
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,8,8,128,1,float16,float16,0,0.01072480008006096
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,8,8,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,8,8,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,8,1,128,1,float16,float16,0,0.011921600252389909
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,8,1,128,1,float16,fp8,0,0.012371200323104858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,8,1,128,1,fp8,fp8,0,0.01223519966006279
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,8,2,128,1,float16,float16,0,0.010147199779748917
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,8,2,128,1,float16,fp8,0,0.010275200009346008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,8,2,128,1,fp8,fp8,0,0.009987200051546097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,8,4,128,1,float16,float16,0,0.010745599865913391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,8,4,128,1,float16,fp8,0,0.01035040020942688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,8,8,128,1,float16,float16,0,0.010179200023412705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,8,8,128,1,float16,fp8,0,0.01014079973101616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,8,8,128,1,fp8,fp8,0,0.009894400089979171
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,8,1,128,1,float16,float16,0,0.011793600022792816
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,8,1,128,1,float16,fp8,0,0.011956799775362015
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,8,1,128,1,fp8,fp8,0,0.012107200175523757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,8,2,128,1,float16,float16,0,0.009984000027179718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,8,2,128,1,fp8,fp8,0,0.009876800328493118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,8,4,128,1,float16,float16,0,0.010304000228643417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,8,4,128,1,float16,fp8,0,0.010063999891281128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,8,4,128,1,fp8,fp8,0,0.010060799866914749
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,8,8,128,1,float16,float16,0,0.009808000177145004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,8,8,128,1,float16,fp8,0,0.009747199714183807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,8,8,128,1,fp8,fp8,0,0.009692800045013428
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,8,1,128,1,float16,float16,0,0.011347199976444244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,8,1,128,1,float16,fp8,0,0.011695999652147293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,8,4,128,1,float16,float16,0,0.0226623997092247
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,8,2,128,1,float16,float16,0,0.009918399900197983
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,8,2,128,1,float16,fp8,0,0.009627199918031692
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,8,2,128,1,fp8,fp8,0,0.009647999703884125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,8,4,128,1,float16,float16,0,0.00984639972448349
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,8,4,128,1,float16,fp8,0,0.0094991996884346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,8,4,128,1,fp8,fp8,0,0.009510400146245957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,8,8,128,1,float16,float16,0,0.009703999757766724
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,8,1,128,1,fp8,fp8,0,0.013422399759292603
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,8,8,128,1,float16,fp8,0,0.009455999732017517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,8,8,128,1,fp8,fp8,0,0.009657599776983262
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,8,1,128,1,float16,float16,0,0.011462400108575821
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,8,1,128,1,float16,fp8,0,0.011843200027942657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,8,1,128,1,fp8,fp8,0,0.011849600076675414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,8,2,128,1,float16,float16,0,0.009683199971914292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,8,2,128,1,float16,fp8,0,0.009548799693584442
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,8,2,128,1,fp8,fp8,0,0.009391999989748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,8,4,128,1,float16,float16,0,0.009796799719333648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,8,4,128,1,float16,fp8,0,0.009561599791049957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,8,8,128,1,float16,fp8,0,0.01372160017490387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,8,4,128,1,fp8,fp8,0,0.009428799897432328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,8,8,128,1,float16,float16,0,0.009571199864149093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,8,8,128,1,float16,fp8,0,0.009433600306510925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,8,1,128,1,float16,float16,0,0.011243200302124024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,8,1,128,1,fp8,fp8,0,0.011710400134325028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,8,1,128,1,float16,fp8,0,0.011695999652147293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,8,1,128,1,fp8,fp8,0,0.011561600118875503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,8,2,128,1,float16,fp8,0,0.009521599858999252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,8,2,128,1,fp8,fp8,0,0.009491200000047684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,8,4,128,1,float16,float16,0,0.009747199714183807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,8,4,128,1,float16,fp8,0,0.00947680026292801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,8,4,128,1,fp8,fp8,0,0.009531199932098389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,8,1,128,1,float16,float16,0,0.018060800433158875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,8,4,128,1,fp8,fp8,0,0.010279999673366546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,8,1,128,1,fp8,fp8,0,0.01791519969701767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,8,2,128,1,float16,float16,0,0.02340800017118454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,8,2,128,1,fp8,fp8,0,0.023286400735378264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,8,4,128,1,float16,float16,0,0.034668800234794614
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,8,4,128,1,float16,fp8,0,0.034508800506591795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,8,4,128,1,fp8,fp8,0,0.034462401270866395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,8,8,128,1,float16,float16,0,0.032455998659133914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,8,8,128,1,float16,fp8,0,0.032118400931358336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,8,8,128,1,fp8,fp8,0,0.03210720121860504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,8,1,128,1,float16,float16,0,0.01316159963607788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,8,1,128,1,float16,fp8,0,0.012943999469280243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,8,1,128,1,fp8,fp8,0,0.012945599853992462
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,8,1,128,1,fp8,fp8,0,0.01183359995484352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,8,2,128,1,float16,float16,0,0.015748800337314607
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,8,2,128,1,float16,fp8,0,0.015585599839687348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,8,4,128,1,float16,float16,0,0.021216000616550445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,8,4,128,1,float16,fp8,0,0.021155199408531188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,8,8,128,1,fp8,fp8,0,0.009759999811649323
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,8,4,128,1,float16,fp8,0,0.010995200276374817
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,8,4,128,1,fp8,fp8,0,0.021137599647045136
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,8,8,128,1,float16,float16,0,0.019700799882411957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,8,8,128,1,float16,fp8,0,0.01955839991569519
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,8,8,128,1,fp8,fp8,0,0.019494399428367615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,8,1,128,1,float16,float16,0,0.011238399893045425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,8,1,128,1,float16,fp8,0,0.017948800325393678
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,8,1,128,1,float16,fp8,0,0.011385600268840789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,8,1,128,1,fp8,fp8,0,0.011151999980211259
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,8,2,128,1,float16,float16,0,0.01130559965968132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,8,2,128,1,float16,fp8,0,0.023558400571346283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,8,4,128,1,float16,float16,0,0.013996799290180207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,8,4,128,1,float16,fp8,0,0.013840000331401824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,8,4,128,1,fp8,fp8,0,0.013887999951839447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,8,8,128,1,float16,float16,0,0.013193599879741669
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,8,8,128,1,float16,fp8,0,0.01321599930524826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,8,8,128,1,fp8,fp8,0,0.013091200590133667
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,8,1,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,8,1,128,1,float16,fp8,0,0.010198400169610978
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,8,1,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,8,2,128,1,float16,float16,0,0.010633599758148194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,8,2,128,1,float16,fp8,0,0.010846400260925293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,8,2,128,1,fp8,fp8,0,0.010740800201892853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,8,4,128,1,float16,float16,0,0.010931199789047242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,8,4,128,1,float16,fp8,0,0.010897599905729295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,8,4,128,1,fp8,fp8,0,0.0108815997838974
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,8,8,128,1,float16,float16,0,0.010232000052928925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,8,8,128,1,float16,fp8,0,0.01029599979519844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,8,8,128,1,fp8,fp8,0,0.010047999769449234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,8,1,128,1,float16,float16,0,0.009876800328493118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,8,1,128,1,float16,fp8,0,0.009433600306510925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,8,1,128,1,fp8,fp8,0,0.009721600264310837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,8,2,128,1,float16,float16,0,0.009702400118112565
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,8,2,128,1,float16,fp8,0,0.00995199978351593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,8,2,128,1,fp8,fp8,0,0.009787199646234512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,8,4,128,1,float16,float16,0,0.009990400075912476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,8,4,128,1,float16,fp8,0,0.010094399750232696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,8,4,128,1,fp8,fp8,0,0.010102400183677673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,8,8,128,1,float16,float16,0,0.01029760017991066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,8,8,128,1,float16,fp8,0,0.010332799702882766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,8,8,128,1,fp8,fp8,0,0.010291200131177902
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,8,1,128,1,float16,float16,0,0.009991999715566635
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,8,1,128,1,float16,fp8,0,0.00974240005016327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,8,1,128,1,fp8,fp8,0,0.009662400186061858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,8,2,128,1,float16,float16,0,0.00958079993724823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,8,2,128,1,float16,fp8,0,0.009611199796199798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,8,2,128,1,fp8,fp8,0,0.009465599805116654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,8,4,128,1,float16,float16,0,0.009996800124645234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,8,4,128,1,fp8,fp8,0,0.009963200241327286
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,8,8,128,1,float16,float16,0,0.00979520007967949
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,8,8,128,1,float16,fp8,0,0.009996800124645234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,8,8,128,1,fp8,fp8,0,0.01003040000796318
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,8,1,128,1,float16,float16,0,0.00952640026807785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,8,1,128,1,float16,fp8,0,0.009428799897432328
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,8,1,128,1,fp8,fp8,0,0.009424000233411788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,8,2,128,1,float16,float16,0,0.009747199714183807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,8,2,128,1,float16,float16,0,0.00963039994239807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,8,2,128,1,float16,fp8,0,0.009435199946165086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,8,4,128,1,float16,float16,0,0.009569600224494934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,8,4,128,1,float16,fp8,0,0.009532800316810608
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,8,4,128,1,fp8,fp8,0,0.009540800005197525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,8,2,128,1,float16,fp8,0,0.011286400258541107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,8,2,128,1,fp8,fp8,0,0.011587200313806533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,8,8,128,1,float16,fp8,0,0.009524799883365631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,8,8,128,1,fp8,fp8,0,0.009654399752616883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,8,2,128,1,float16,fp8,0,0.009668800234794616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,8,1,128,1,float16,fp8,0,0.009515199810266495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,8,1,128,1,fp8,fp8,0,0.00931359976530075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,8,2,128,1,float16,float16,0,0.009489600360393525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,8,2,128,1,float16,fp8,0,0.009223999828100205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,8,4,128,1,float16,float16,0,0.009567999839782714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,8,4,128,1,float16,fp8,0,0.009431999921798707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,8,4,128,1,fp8,fp8,0,0.009398400038480758
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,8,8,128,1,float16,float16,0,0.009673599898815156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,8,8,128,1,float16,fp8,0,0.00942080020904541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,8,8,128,1,fp8,fp8,0,0.009404800087213516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,8,1,128,1,float16,float16,0,0.009524799883365631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,8,1,128,1,float16,fp8,0,0.009430400282144546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,8,1,128,1,fp8,fp8,0,0.00929120033979416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,8,2,128,1,float16,float16,0,0.009679999947547913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,8,2,128,1,float16,fp8,0,0.009372799843549728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,8,2,128,1,fp8,fp8,0,0.015248000621795654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,8,4,128,1,float16,float16,0,0.009647999703884125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,8,4,128,1,fp8,fp8,0,0.009411200135946273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,8,4,128,1,float16,fp8,0,0.009697599709033966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,8,2,128,1,fp8,fp8,0,0.009547200053930283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,8,8,128,1,float16,float16,0,0.009656000137329101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,8,1,128,1,float16,float16,0,0.009728000313043595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,8,2,128,1,fp8,fp8,0,0.009352000057697296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,4,1,128,1,float16,fp8,0,2.953222465515137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,8,2,128,1,fp8,fp8,0,0.009256000071763993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,8,4,128,1,float16,fp8,0,0.009433600306510925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,4,1,128,1,fp8,fp8,0,2.8917871475219727
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,4,2,128,1,float16,fp8,0,2.9266384124755858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,4,4,128,1,fp8,fp8,0,1.4869744300842285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,4,4,128,1,float16,float16,0,1.7766096115112304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,4,2,128,1,fp8,fp8,0,2.9027616500854494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,4,2,128,1,float16,float16,0,3.6044078826904298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,4,1,128,1,float16,float16,0,1.6788591384887694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,4,1,128,1,float16,fp8,0,1.5393744468688966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,4,2,128,1,float16,fp8,0,1.481599998474121
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,4,1,128,1,float16,float16,0,3.5171871185302734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,4,2,128,1,float16,float16,0,1.909841537475586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,4,2,128,1,fp8,fp8,0,1.6569311141967773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,4,4,128,1,float16,float16,0,0.8900320053100585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,4,4,128,1,float16,fp8,0,0.7565455913543702
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,4,4,128,1,fp8,fp8,0,0.7549344062805176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,4,1,128,1,float16,fp8,0,0.7792479991912842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,4,1,128,1,float16,float16,0,0.9509471893310547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,4,1,128,1,fp8,fp8,0,0.7876304149627685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,4,4,128,1,float16,fp8,0,1.4783072471618652
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,4,2,128,1,float16,fp8,0,0.7450736045837403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,4,4,128,1,float16,float16,0,0.4856832027435303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,4,4,128,1,float16,fp8,0,0.4029280185699463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,4,2,128,1,fp8,fp8,0,0.7546224117279052
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,4,4,128,1,fp8,fp8,0,0.38189599514007566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,4,1,128,1,float16,float16,0,0.42914562225341796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,4,1,128,1,float16,fp8,0,0.41123361587524415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,4,1,128,1,fp8,fp8,0,0.396068811416626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,4,2,128,1,float16,float16,0,0.4372704029083252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,4,2,128,1,float16,fp8,0,0.3766063928604126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,4,2,128,1,fp8,fp8,0,0.3715791940689087
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,4,1,128,1,fp8,fp8,0,1.4649007797241211
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,4,1,128,1,float16,fp8,0,1.6870271682739257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,4,1,128,1,fp8,fp8,0,1.7067855834960937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,4,1,128,1,float16,float16,0,2.049928092956543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,4,2,128,1,float16,fp8,0,1.6631200790405274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,4,2,128,1,float16,float16,0,1.9879728317260743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,4,2,128,1,fp8,fp8,0,1.676041603088379
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,4,2,128,1,float16,float16,0,0.854969596862793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,4,4,128,1,fp8,fp8,0,1.0204256057739258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,4,1,128,1,float16,float16,0,0.966096019744873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,4,1,128,1,float16,fp8,0,0.9845600128173828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,4,1,128,1,fp8,fp8,0,0.8482463836669922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,4,2,128,1,fp8,fp8,0,0.8414192199707031
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,4,2,128,1,float16,fp8,0,0.8958383560180664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,4,4,128,1,float16,fp8,0,0.45557122230529784
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,4,4,128,1,fp8,fp8,0,0.4553743839263916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,4,1,128,1,float16,float16,0,0.49692959785461427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,4,1,128,1,float16,fp8,0,0.42955517768859863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,4,1,128,1,fp8,fp8,0,0.42723841667175294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,4,2,128,1,float16,float16,0,0.512830400466919
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,4,2,128,1,fp8,fp8,0,0.43043041229248047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,4,4,128,1,float16,float16,0,0.3045120000839233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,4,4,128,1,float16,fp8,0,0.24085919857025145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,4,1,128,1,float16,float16,0,0.26735360622406007
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,4,1,128,1,float16,fp8,0,0.23788321018218994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,4,1,128,1,fp8,fp8,0,0.23692960739135743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,4,2,128,1,float16,float16,0,0.2725791931152344
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,4,4,128,1,float16,fp8,0,0.8971088409423829
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,4,2,128,1,float16,fp8,0,0.2372175931930542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,4,4,128,1,float16,float16,0,1.068558406829834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,4,2,128,1,float16,float16,0,0.9916399955749512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,4,1,128,1,float16,fp8,0,1.1838175773620605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,4,1,128,1,fp8,fp8,0,1.1862863540649413
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,4,2,128,1,float16,fp8,0,0.44142718315124513
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,4,2,128,1,float16,fp8,0,1.1877391815185547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,4,4,128,1,fp8,fp8,0,0.24086239337921142
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,4,4,128,1,float16,float16,0,0.7631184101104737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,4,4,128,1,float16,fp8,0,0.6308047771453857
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,4,4,128,1,fp8,fp8,0,0.6356319904327392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,4,2,128,1,fp8,fp8,0,0.23851521015167237
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,4,1,128,1,float16,float16,0,0.6966383934020997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,4,1,128,1,float16,fp8,0,0.6028192043304443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,4,1,128,1,fp8,fp8,0,0.6028031826019287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,4,2,128,1,float16,float16,0,0.7094816207885742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,4,4,128,1,float16,float16,0,0.600270414352417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,4,2,128,1,float16,fp8,0,0.6683135986328125
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,4,4,128,1,float16,float16,0,0.4095903873443604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,4,2,128,1,fp8,fp8,0,0.6049600124359131
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,4,1,128,1,float16,float16,0,1.3694175720214843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,4,4,128,1,fp8,fp8,0,0.33042240142822266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,4,1,128,1,float16,fp8,0,0.31007680892944334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,4,1,128,1,fp8,fp8,0,0.31055679321289065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,4,2,128,1,float16,float16,0,0.3709295988082886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,4,2,128,1,float16,fp8,0,0.3128895998001099
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,4,2,128,1,fp8,fp8,0,0.31102240085601807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,4,4,128,1,float16,float16,0,0.2401360034942627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,4,4,128,1,float16,fp8,0,0.17195839881896974
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,4,4,128,1,fp8,fp8,0,0.17352160215377807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,4,1,128,1,float16,float16,0,0.193123197555542
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,4,2,128,1,float16,float16,0,0.19413440227508544
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,4,2,128,1,float16,float16,0,1.3917872428894043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,4,2,128,1,fp8,fp8,0,1.1906607627868653
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,4,2,128,1,float16,fp8,0,0.17117760181427003
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,4,2,128,1,fp8,fp8,0,0.17211999893188476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,4,1,128,1,float16,fp8,0,1.5389344215393066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,4,1,128,1,float16,float16,0,1.769228744506836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,4,2,128,1,float16,float16,0,1.8392751693725586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,4,4,128,1,float16,fp8,0,0.32803199291229246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,4,1,128,1,float16,float16,0,0.36330080032348633
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,4,4,128,1,float16,float16,0,0.991267204284668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,4,4,128,1,float16,fp8,0,0.7975808143615722
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,4,1,128,1,float16,fp8,0,0.17121920585632325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,4,2,128,1,float16,fp8,0,1.5499391555786133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,4,4,128,1,fp8,fp8,0,0.7974847793579102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,4,2,128,1,fp8,fp8,0,1.639566421508789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,4,1,128,1,float16,fp8,0,0.7741439819335938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,4,1,128,1,float16,float16,0,0.9585552215576172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,4,1,128,1,fp8,fp8,0,0.7750592231750488
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,4,2,128,1,float16,float16,0,0.919222354888916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,4,2,128,1,float16,fp8,0,0.7779551982879639
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,4,2,128,1,fp8,fp8,0,0.7941472053527832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,4,4,128,1,float16,float16,0,0.4709199905395508
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,4,4,128,1,float16,fp8,0,0.40798239707946776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,4,4,128,1,fp8,fp8,0,0.46003360748291017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,4,1,128,1,float16,float16,0,0.4532336235046387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,4,1,128,1,float16,fp8,0,0.39615840911865235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,4,1,128,1,fp8,fp8,0,0.4102479934692383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,4,1,128,1,fp8,fp8,0,1.5482255935668945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,4,4,128,1,float16,float16,0,0.23932321071624757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,4,2,128,1,float16,fp8,0,0.43307838439941404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,4,4,128,1,float16,fp8,0,0.21659040451049805
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,4,1,128,1,float16,float16,0,0.2264784097671509
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,4,1,128,1,float16,fp8,0,0.21431999206542968
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,4,2,128,1,float16,float16,0,0.2303999900817871
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,4,2,128,1,float16,fp8,0,0.2086496114730835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,4,2,128,1,fp8,fp8,0,0.2014240026473999
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,4,4,128,1,float16,fp8,0,0.10586400032043457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,4,4,128,1,fp8,fp8,0,0.10538560152053833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,4,1,128,1,fp8,fp8,0,0.17063839435577394
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,4,1,128,1,float16,float16,0,0.11559679508209228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,4,1,128,1,float16,fp8,0,0.10493600368499756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,4,1,128,1,fp8,fp8,0,0.10470240116119385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,4,2,128,1,float16,fp8,0,0.1045024037361145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,4,2,128,1,fp8,fp8,0,0.10463039875030518
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,4,1,128,1,float16,float16,0,1.0281231880187989
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,4,2,128,1,float16,float16,0,0.45495038032531737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,4,2,128,1,fp8,fp8,0,0.3948800086975098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,4,4,128,1,fp8,fp8,0,0.20762879848480226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,4,1,128,1,fp8,fp8,0,0.20967841148376465
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,4,1,128,1,float16,fp8,0,0.9439056396484375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,4,1,128,1,fp8,fp8,0,0.9028448104858399
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,4,4,128,1,float16,float16,0,0.5712272167205811
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,4,2,128,1,float16,fp8,0,0.9052191734313965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,4,2,128,1,float16,float16,0,1.041598415374756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,4,2,128,1,fp8,fp8,0,0.9072367668151855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,4,4,128,1,float16,fp8,0,0.4913616180419922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,4,4,128,1,fp8,fp8,0,0.47669281959533694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,4,1,128,1,float16,float16,0,0.5223135948181152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,4,1,128,1,fp8,fp8,0,0.4620816230773926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,4,2,128,1,float16,fp8,0,0.46828317642211914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,4,2,128,1,float16,float16,0,0.5332223892211914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,4,2,128,1,float16,float16,0,0.11763999462127686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,4,2,128,1,fp8,fp8,0,0.4624159812927246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,4,4,128,1,float16,float16,0,0.30360639095306396
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,4,4,128,1,float16,fp8,0,0.25444960594177246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,4,4,128,1,fp8,fp8,0,0.24886560440063477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,4,1,128,1,float16,float16,0,0.2750447988510132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,4,1,128,1,float16,fp8,0,0.2376352071762085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,4,2,128,1,float16,fp8,0,0.2438800096511841
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,4,2,128,1,fp8,fp8,0,0.241540789604187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,4,4,128,1,float16,fp8,0,0.13118560314178468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,4,4,128,1,fp8,fp8,0,0.133788800239563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,4,1,128,1,float16,float16,0,0.14402719736099243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,4,1,128,1,float16,fp8,0,0.12767360210418702
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,4,1,128,1,fp8,fp8,0,0.13059040307998657
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,4,2,128,1,float16,float16,0,0.14875199794769287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,4,2,128,1,float16,fp8,0,0.12701120376586914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,4,2,128,1,fp8,fp8,0,0.12949119806289672
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,4,4,128,1,float16,fp8,0,0.07951679825782776
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,4,4,128,1,fp8,fp8,0,0.08055040240287781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,4,1,128,1,float16,float16,0,0.08846240043640137
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,4,1,128,1,float16,fp8,0,0.08139200210571289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,4,1,128,1,fp8,fp8,0,0.08047999739646912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,4,2,128,1,float16,float16,0,0.08897280097007751
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,4,2,128,1,float16,fp8,0,0.07970399856567383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,4,2,128,1,fp8,fp8,0,0.08071039915084839
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,4,4,128,1,float16,float16,0,0.12726240158081054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,4,1,128,1,float16,fp8,0,0.8587072372436524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,4,1,128,1,float16,float16,0,0.9669008255004883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,4,1,128,1,float16,fp8,0,0.4592912197113037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,4,1,128,1,fp8,fp8,0,0.8542736053466797
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,4,2,128,1,float16,fp8,0,0.8540063858032226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,4,1,128,1,fp8,fp8,0,0.23946080207824708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,4,2,128,1,float16,float16,0,0.9838175773620605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,4,2,128,1,float16,float16,0,0.2777343988418579
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,4,4,128,1,float16,float16,0,0.1697424054145813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,4,2,128,1,fp8,fp8,0,0.8608544349670411
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,4,4,128,1,float16,fp8,0,0.4577455997467041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,4,4,128,1,fp8,fp8,0,0.45502557754516604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,4,1,128,1,float16,float16,0,0.48556318283081057
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,4,1,128,1,float16,fp8,0,0.43354239463806155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,4,1,128,1,fp8,fp8,0,0.4330927848815918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,4,4,128,1,float16,float16,0,0.09152160286903381
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,4,2,128,1,float16,float16,0,0.5038000106811523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,4,4,128,1,fp8,fp8,0,0.23342878818511964
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,4,4,128,1,float16,float16,0,0.278059196472168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,4,1,128,1,float16,float16,0,0.24841599464416503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,4,2,128,1,float16,fp8,0,0.43594560623168943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,4,1,128,1,fp8,fp8,0,0.22206559181213378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,4,2,128,1,float16,float16,0,0.2592223882675171
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,4,2,128,1,float16,fp8,0,0.22488160133361818
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,4,4,128,1,float16,float16,0,0.1427728056907654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,4,4,128,1,float16,fp8,0,0.12500319480895997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,4,4,128,1,fp8,fp8,0,0.1233024001121521
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,4,1,128,1,float16,float16,0,0.129585599899292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,4,1,128,1,float16,fp8,0,0.11575520038604736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,4,1,128,1,fp8,fp8,0,0.11526399850845337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,4,2,128,1,float16,float16,0,0.13294240236282348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,4,2,128,1,float16,fp8,0,0.11692800521850585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,4,4,128,1,float16,float16,0,0.07491679787635804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,4,2,128,1,fp8,fp8,0,0.11567360162734985
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,4,4,128,1,float16,fp8,0,0.06308959722518921
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,4,4,128,1,fp8,fp8,0,0.06289920210838318
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,4,1,128,1,float16,float16,0,0.06688640117645264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,4,1,128,1,float16,fp8,0,0.06089119911193848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,4,1,128,1,fp8,fp8,0,0.0608847975730896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,4,2,128,1,float16,float16,0,0.06749439835548401
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,4,2,128,1,float16,fp8,0,0.06132320165634155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,4,2,128,1,fp8,fp8,0,0.06130399703979492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,4,4,128,1,float16,fp8,0,0.05648959875106811
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,4,4,128,1,fp8,fp8,0,0.0564736008644104
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,4,1,128,1,float16,float16,0,0.06134080290794373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,4,1,128,1,float16,fp8,0,0.058379197120666505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,4,1,128,1,fp8,fp8,0,0.05792639851570129
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,4,2,128,1,float16,float16,0,0.061352002620697024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,4,2,128,1,float16,fp8,0,0.057036799192428586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,4,2,128,1,fp8,fp8,0,0.0563152015209198
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,4,1,128,1,float16,fp8,0,0.2222656011581421
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,4,2,128,1,fp8,fp8,0,0.43349599838256836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,4,2,128,1,fp8,fp8,0,0.22313919067382812
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,4,1,128,1,float16,float16,0,0.6227663993835449
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,4,1,128,1,float16,fp8,0,0.5523007869720459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,4,1,128,1,fp8,fp8,0,0.5554160118103028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,4,2,128,1,float16,float16,0,0.6418384075164795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,4,2,128,1,float16,fp8,0,0.5630911827087403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,4,4,128,1,float16,float16,0,0.35247039794921875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,4,2,128,1,fp8,fp8,0,0.5576335906982421
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,4,4,128,1,float16,fp8,0,0.3050112009048462
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,4,4,128,1,float16,float16,0,0.06275039911270142
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,4,1,128,1,float16,float16,0,0.31837759017944334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,4,4,128,1,float16,float16,0,0.5416912078857422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,4,1,128,1,float16,fp8,0,0.2854975938796997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,4,4,128,1,float16,fp8,0,0.23402559757232666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,4,1,128,1,fp8,fp8,0,0.2890991926193237
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,4,2,128,1,fp8,fp8,0,0.29105439186096194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,4,2,128,1,float16,float16,0,0.3317296028137207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,4,2,128,1,float16,fp8,0,0.2879168033599854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,4,4,128,1,float16,float16,0,0.1883455991744995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,4,4,128,1,float16,fp8,0,0.15994240045547486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,4,4,128,1,fp8,fp8,0,0.16018879413604736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,4,4,128,1,fp8,fp8,0,0.3042304039001465
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,4,1,128,1,fp8,fp8,0,0.1515712022781372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,4,1,128,1,float16,fp8,0,0.1526975989341736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,4,2,128,1,fp8,fp8,0,0.15340800285339357
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,4,2,128,1,float16,float16,0,0.1757040023803711
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,4,2,128,1,float16,fp8,0,0.15241919755935668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,4,4,128,1,float16,fp8,0,0.0861728012561798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,4,4,128,1,fp8,fp8,0,0.08613439798355102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,4,4,128,1,float16,float16,0,0.10199040174484253
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,4,1,128,1,float16,float16,0,0.09202560186386108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,4,1,128,1,float16,fp8,0,0.08294079899787903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,4,2,128,1,float16,float16,0,0.0946672022342682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,4,2,128,1,float16,fp8,0,0.08259360194206238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,4,2,128,1,fp8,fp8,0,0.08312000036239624
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,4,4,128,1,float16,float16,0,0.05563520193099976
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,4,4,128,1,fp8,fp8,0,0.048430401086807254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,4,1,128,1,float16,fp8,0,0.04837760031223297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,4,1,128,1,fp8,fp8,0,0.04856959879398346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,4,2,128,1,float16,fp8,0,0.048260799050331114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,4,2,128,1,float16,float16,0,0.05252640247344971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,4,2,128,1,fp8,fp8,0,0.04836159944534302
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,4,4,128,1,float16,fp8,0,0.04639680087566376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,4,4,128,1,fp8,fp8,0,0.046454399824142456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,4,1,128,1,float16,float16,0,0.039129599928855896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,4,1,128,1,float16,fp8,0,0.03738400042057037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,4,1,128,1,fp8,fp8,0,0.03741439878940582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,4,2,128,1,float16,float16,0,0.04572480022907257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,4,2,128,1,float16,fp8,0,0.044206398725509646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,4,2,128,1,fp8,fp8,0,0.04427199959754944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,4,1,128,1,float16,fp8,0,0.5608655929565429
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,4,1,128,1,float16,float16,0,0.6207632064819336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,4,1,128,1,fp8,fp8,0,0.5650032043457032
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,4,1,128,1,float16,float16,0,0.17217119932174682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,4,2,128,1,float16,fp8,0,0.5713551998138428
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,4,2,128,1,fp8,fp8,0,0.5692895889282227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,4,2,128,1,float16,float16,0,0.6448336124420166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,4,1,128,1,fp8,fp8,0,0.08262879848480224
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,4,4,128,1,float16,fp8,0,0.04916000068187713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,4,4,128,1,float16,float16,0,0.04952319860458374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,4,1,128,1,float16,float16,0,0.32081758975982666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,4,1,128,1,float16,fp8,0,0.29077439308166503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,4,2,128,1,float16,float16,0,0.3316335916519165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,4,2,128,1,float16,fp8,0,0.2947168111801147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,4,2,128,1,fp8,fp8,0,0.2930864095687866
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,4,4,128,1,float16,float16,0,0.19337279796600343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,4,4,128,1,float16,fp8,0,0.1659343957901001
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,4,4,128,1,fp8,fp8,0,0.16476960182189943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,4,1,128,1,float16,float16,0,0.17191200256347655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,4,1,128,1,float16,fp8,0,0.1546831965446472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,4,1,128,1,fp8,fp8,0,0.15357439517974852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,4,2,128,1,float16,float16,0,0.17759040594100953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,4,2,128,1,float16,fp8,0,0.1564255952835083
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,4,2,128,1,fp8,fp8,0,0.15579999685287477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,4,4,128,1,float16,float16,0,0.1071023941040039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,4,4,128,1,fp8,fp8,0,0.09172160029411316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,4,1,128,1,float16,float16,0,0.094760000705719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,4,1,128,1,float16,fp8,0,0.08533120155334473
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,4,1,128,1,fp8,fp8,0,0.08547359704971313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,4,2,128,1,float16,float16,0,0.09818400144577026
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,4,2,128,1,float16,fp8,0,0.08642879724502564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,4,2,128,1,fp8,fp8,0,0.08661440014839172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,4,4,128,1,float16,float16,0,0.06341919898986817
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,4,4,128,1,float16,fp8,0,0.31366720199584963
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,4,4,128,1,float16,fp8,0,0.05259360074996948
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,4,4,128,1,fp8,fp8,0,0.05280960202217102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,4,4,128,1,fp8,fp8,0,0.3124351978302002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,4,1,128,1,float16,fp8,0,0.05199360251426697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,4,1,128,1,fp8,fp8,0,0.05213119983673096
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,4,2,128,1,float16,float16,0,0.057011198997497556
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,4,2,128,1,float16,fp8,0,0.05221760272979736
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,4,2,128,1,fp8,fp8,0,0.052260798215866086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,4,4,128,1,float16,float16,0,0.03949120044708252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,4,4,128,1,float16,fp8,0,0.035867199301719666
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,4,4,128,1,fp8,fp8,0,0.0364656001329422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,4,1,128,1,float16,float16,0,0.03790720105171204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,4,1,128,1,float16,fp8,0,0.03550080060958862
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,4,1,128,1,fp8,fp8,0,0.035841599106788635
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,4,2,128,1,float16,float16,0,0.03832319974899292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,4,2,128,1,float16,fp8,0,0.0356687992811203
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,4,2,128,1,fp8,fp8,0,0.035662400722503665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,4,4,128,1,float16,float16,0,0.030745598673820495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,4,4,128,1,float16,fp8,0,0.029491201043128967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,4,1,128,1,float16,float16,0,0.030244800448417663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,4,1,128,1,float16,fp8,0,0.02945440113544464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,4,1,128,1,fp8,fp8,0,0.0293071985244751
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,4,2,128,1,float16,float16,0,0.03017120063304901
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,4,4,128,1,float16,fp8,0,0.09257919788360595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,4,2,128,1,float16,fp8,0,0.02922559976577759
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,4,2,128,1,fp8,fp8,0,0.029099199175834655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,4,1,128,1,float16,float16,0,0.39677441120147705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,4,1,128,1,float16,fp8,0,0.3656816005706787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,4,4,128,1,float16,float16,0,0.36784000396728517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,4,1,128,1,fp8,fp8,0,0.365447998046875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,4,1,128,1,float16,float16,0,0.05594720244407654
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,4,2,128,1,float16,float16,0,0.4145520210266113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,4,1,128,1,fp8,fp8,0,0.2908655881881714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,4,2,128,1,float16,fp8,0,0.3697648048400879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,4,4,128,1,float16,fp8,0,0.20786559581756592
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,4,2,128,1,fp8,fp8,0,0.3718400001525879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,4,4,128,1,fp8,fp8,0,0.20706400871276856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,4,1,128,1,float16,fp8,0,0.19117599725723267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,4,1,128,1,fp8,fp8,0,0.19177119731903075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,4,2,128,1,float16,float16,0,0.21682240962982177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,4,2,128,1,float16,fp8,0,0.1944416046142578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,4,2,128,1,fp8,fp8,0,0.1947808027267456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,4,4,128,1,fp8,fp8,0,0.02948960065841675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,4,4,128,1,float16,float16,0,0.1306447982788086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,4,4,128,1,float16,fp8,0,0.11271359920501708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,4,4,128,1,fp8,fp8,0,0.11212799549102784
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,4,1,128,1,float16,float16,0,0.11367520093917846
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,4,1,128,1,fp8,fp8,0,0.10343999862670898
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,4,2,128,1,float16,float16,0,0.11795680522918701
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,4,2,128,1,fp8,fp8,0,0.10532480478286743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,4,4,128,1,float16,float16,0,0.07468159794807434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,4,4,128,1,float16,fp8,0,0.06483200192451477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,4,4,128,1,fp8,fp8,0,0.06455199718475342
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,4,1,128,1,float16,float16,0,0.0641759991645813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,4,1,128,1,float16,fp8,0,0.05973759889602661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,4,1,128,1,fp8,fp8,0,0.05973280072212219
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,4,2,128,1,float16,float16,0,0.06803039908409118
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,4,2,128,1,float16,fp8,0,0.05976639986038208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,4,2,128,1,fp8,fp8,0,0.05953279733657837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,4,4,128,1,float16,float16,0,0.04257600009441376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,4,4,128,1,float16,fp8,0,0.03834719955921173
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,4,1,128,1,float16,float16,0,0.03890079855918884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,4,1,128,1,float16,fp8,0,0.03781920075416565
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,4,1,128,1,fp8,fp8,0,0.037571200728416444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,4,2,128,1,float16,float16,0,0.03910239934921265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,4,2,128,1,fp8,fp8,0,0.03786720037460327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,4,4,128,1,float16,float16,0,0.037371200323104856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,4,4,128,1,float16,fp8,0,0.036057600378990175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,4,4,128,1,fp8,fp8,0,0.03607200086116791
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,4,1,128,1,float16,float16,0,0.03455199897289276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,4,4,128,1,float16,float16,0,0.24238719940185546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,4,1,128,1,float16,fp8,0,0.03442560136318207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,4,1,128,1,fp8,fp8,0,0.03432640135288238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,4,2,128,1,float16,fp8,0,0.03587839901447296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,4,2,128,1,fp8,fp8,0,0.0358240008354187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,4,4,128,1,float16,float16,0,0.02438559979200363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,4,4,128,1,float16,fp8,0,0.02353920042514801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,4,4,128,1,fp8,fp8,0,0.023632000386714935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,4,1,128,1,float16,float16,0,0.022942399978637694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,4,1,128,1,float16,fp8,0,0.023311999440193177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,4,1,128,1,float16,fp8,0,0.10395519733428955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,4,1,128,1,fp8,fp8,0,0.023228800296783446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,4,2,128,1,float16,fp8,0,0.10578080415725707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,4,2,128,1,float16,fp8,0,0.023489600419998168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,4,2,128,1,fp8,fp8,0,0.02342880070209503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,4,1,128,1,float16,float16,0,0.42378082275390627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,4,1,128,1,float16,fp8,0,0.39953439235687255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,4,1,128,1,float16,float16,0,0.05225440263748169
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,4,1,128,1,fp8,fp8,0,0.3986032009124756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,4,2,128,1,float16,float16,0,0.4471759796142578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,4,2,128,1,float16,fp8,0,0.4057024002075195
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,4,2,128,1,fp8,fp8,0,0.4066864013671875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,4,4,128,1,float16,float16,0,0.2651855945587158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,4,2,128,1,float16,float16,0,0.03627359867095947
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,4,4,128,1,float16,fp8,0,0.23007841110229493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,4,1,128,1,float16,float16,0,0.2106112003326416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,4,1,128,1,float16,float16,0,0.22228798866271973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,4,4,128,1,fp8,fp8,0,0.2294991970062256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,4,1,128,1,float16,fp8,0,0.20766079425811768
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,4,1,128,1,fp8,fp8,0,0.2075119972229004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,4,2,128,1,float16,fp8,0,0.21075839996337892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,4,2,128,1,fp8,fp8,0,0.21064479351043702
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,4,4,128,1,float16,float16,0,0.14169119596481322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,4,4,128,1,float16,fp8,0,0.12329280376434326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,4,2,128,1,float16,float16,0,0.023177599906921385
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,4,4,128,1,fp8,fp8,0,0.12353600263595581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,4,1,128,1,float16,float16,0,0.11939040422439576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,4,1,128,1,float16,fp8,0,0.11081600189208984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,4,1,128,1,fp8,fp8,0,0.11122720241546631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,4,2,128,1,float16,float16,0,0.124617600440979
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,4,2,128,1,fp8,fp8,0,0.11362080574035645
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,4,4,128,1,float16,float16,0,0.07855679988861083
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,4,4,128,1,float16,fp8,0,0.07037119865417481
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,4,1,128,1,float16,float16,0,0.06696320176124573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,4,4,128,1,fp8,fp8,0,0.03806079924106598
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,4,1,128,1,fp8,fp8,0,0.06153759956359863
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,4,2,128,1,float16,float16,0,0.07013599872589112
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,4,2,128,1,float16,fp8,0,0.06316319704055787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,4,2,128,1,fp8,fp8,0,0.06291040182113647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,4,2,128,1,float16,fp8,0,0.03776000142097473
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,4,4,128,1,float16,fp8,0,0.038980799913406375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,4,4,128,1,fp8,fp8,0,0.03884479999542236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,4,1,128,1,float16,float16,0,0.04033119976520538
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,4,1,128,1,float16,fp8,0,0.03863680064678192
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,4,1,128,1,fp8,fp8,0,0.03861919939517975
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,4,2,128,1,float16,float16,0,0.04097279906272888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,4,2,128,1,float16,fp8,0,0.03855839967727661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,4,2,128,1,fp8,fp8,0,0.03884640038013458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,4,4,128,1,float16,float16,0,0.026907199621200563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,4,4,128,1,float16,fp8,0,0.025161600112915038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,4,4,128,1,fp8,fp8,0,0.024903999269008638
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,4,1,128,1,float16,float16,0,0.02569440007209778
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,4,1,128,1,float16,fp8,0,0.02484000027179718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,4,1,128,1,fp8,fp8,0,0.02493920028209686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,4,2,128,1,float16,float16,0,0.02568320035934448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,4,2,128,1,float16,fp8,0,0.024719999730587007
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,4,2,128,1,fp8,fp8,0,0.02465600073337555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,4,4,128,1,float16,float16,0,0.02460319995880127
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,4,4,128,1,float16,fp8,0,0.024460799992084503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,4,4,128,1,fp8,fp8,0,0.024483199417591094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,4,1,128,1,float16,float16,0,0.02396000027656555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,4,1,128,1,float16,fp8,0,0.02436159998178482
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,4,1,128,1,fp8,fp8,0,0.024353599548339842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,4,2,128,1,float16,float16,0,0.23303840160369874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,4,2,128,1,float16,float16,0,0.024161599576473236
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,4,2,128,1,float16,fp8,0,0.024120000004768372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,4,2,128,1,fp8,fp8,0,0.02417919933795929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,4,4,128,1,float16,fp8,0,0.01998399943113327
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,4,4,128,1,float16,float16,0,0.01992959976196289
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,4,4,128,1,fp8,fp8,0,0.0198512002825737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,4,1,128,1,float16,float16,0,0.019577600061893463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,4,1,128,1,float16,fp8,0,0.019497600197792054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,4,1,128,1,fp8,fp8,0,0.019512000679969787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,4,2,128,1,float16,float16,0,0.020052799582481386
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,4,2,128,1,fp8,fp8,0,0.019577600061893463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,4,2,128,1,float16,fp8,0,0.11367679834365844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,4,1,128,1,float16,float16,0,0.27861919403076174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,4,4,128,1,fp8,fp8,0,0.07000799775123596
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,4,1,128,1,float16,fp8,0,0.061596798896789554
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,4,1,128,1,fp8,fp8,0,0.2708976030349731
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,4,2,128,1,float16,float16,0,0.3009344100952148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,4,2,128,1,float16,fp8,0,0.28044800758361815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,4,4,128,1,float16,float16,0,0.047886401414871216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,4,2,128,1,fp8,fp8,0,0.280513596534729
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,4,4,128,1,fp8,fp8,0,0.16804800033569336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,4,1,128,1,float16,float16,0,0.1475648045539856
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,4,1,128,1,float16,fp8,0,0.14275519847869872
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,4,1,128,1,fp8,fp8,0,0.14233440160751343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,4,2,128,1,float16,fp8,0,0.14765280485153198
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,4,4,128,1,float16,float16,0,0.10304800271987916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,4,4,128,1,float16,fp8,0,0.09169600009918213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,4,4,128,1,fp8,fp8,0,0.09072960019111634
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,4,1,128,1,float16,float16,0,0.0814512014389038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,4,1,128,1,float16,fp8,0,0.07802559733390808
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,4,1,128,1,fp8,fp8,0,0.07824640274047852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,4,2,128,1,float16,float16,0,0.08673440217971802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,4,2,128,1,float16,fp8,0,0.08097440004348755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,4,2,128,1,fp8,fp8,0,0.08099200129508972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,4,4,128,1,float16,fp8,0,0.05265600085258484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,4,4,128,1,fp8,fp8,0,0.05250399708747864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,4,1,128,1,float16,float16,0,0.04709759950637817
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,4,2,128,1,float16,fp8,0,0.01971520036458969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,4,1,128,1,float16,fp8,0,0.04354240000247955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,4,1,128,1,fp8,fp8,0,0.04345119893550873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,4,2,128,1,float16,float16,0,0.05081920027732849
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,4,2,128,1,float16,fp8,0,0.04588960111141205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,4,1,128,1,float16,fp8,0,0.27069759368896484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,4,4,128,1,float16,float16,0,0.03559840023517609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,4,4,128,1,fp8,fp8,0,0.029444798827171326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,4,1,128,1,float16,float16,0,0.027963200211524965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,4,1,128,1,float16,fp8,0,0.02757279872894287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,4,1,128,1,fp8,fp8,0,0.02747359871864319
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,4,2,128,1,float16,float16,0,0.028035199642181395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,4,4,128,1,float16,fp8,0,0.1679487943649292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,4,4,128,1,float16,float16,0,0.1914240002632141
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,4,2,128,1,fp8,fp8,0,0.027780801057815552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,4,4,128,1,float16,float16,0,0.019787199795246124
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,4,4,128,1,float16,fp8,0,0.019049599766731262
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,4,4,128,1,fp8,fp8,0,0.01891999989748001
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,4,2,128,1,float16,float16,0,0.15839999914169312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,4,1,128,1,float16,float16,0,0.01799360066652298
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,4,2,128,1,fp8,fp8,0,0.14778720140457152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,4,1,128,1,float16,fp8,0,0.018503999710083006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,4,2,128,1,float16,float16,0,0.01825920045375824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,4,2,128,1,fp8,fp8,0,0.018632000684738158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,4,4,128,1,float16,float16,0,0.01788319945335388
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,4,4,128,1,float16,fp8,0,0.01796640008687973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,4,4,128,1,fp8,fp8,0,0.01791200041770935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,4,1,128,1,float16,float16,0,0.017015999555587767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,4,4,128,1,float16,float16,0,0.059380799531936646
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,4,1,128,1,fp8,fp8,0,0.01733600050210953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,4,2,128,1,float16,float16,0,0.01717599928379059
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,4,2,128,1,float16,fp8,0,0.017668800055980684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,4,2,128,1,fp8,fp8,0,0.0174687996506691
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,4,4,128,1,float16,float16,0,0.01716960072517395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,4,4,128,1,float16,fp8,0,0.017664000391960144
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,4,4,128,1,fp8,fp8,0,0.017455999553203583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,4,2,128,1,fp8,fp8,0,0.046137601137161255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,4,1,128,1,float16,float16,0,0.016766400635242464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,4,1,128,1,fp8,fp8,0,0.017212800681591034
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,4,1,128,1,float16,fp8,0,0.017291200160980225
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,4,2,128,1,float16,float16,0,0.016924799978733064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,4,2,128,1,float16,fp8,0,0.017292800545692443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,4,2,128,1,fp8,fp8,0,0.01721920073032379
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,4,4,128,1,float16,float16,0,0.01635040044784546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,4,4,128,1,float16,fp8,0,0.01704320013523102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,4,4,128,1,fp8,fp8,0,0.017140799760818483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,4,1,128,1,float16,float16,0,0.016152000427246092
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,4,1,128,1,float16,fp8,0,0.01680160015821457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,4,1,128,1,fp8,fp8,0,0.016766400635242464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,4,2,128,1,float16,float16,0,0.016622400283813475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,4,2,128,1,float16,fp8,0,0.027489599585533143
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,4,2,128,1,float16,fp8,0,0.016953599452972413
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,4,2,128,1,fp8,fp8,0,0.017083199322223665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,4,1,128,1,float16,float16,0,0.12398240566253663
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,4,1,128,1,fp8,fp8,0,0.12516800165176392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,4,2,128,1,float16,float16,0,0.13630399703979493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,4,2,128,1,float16,fp8,0,0.130132794380188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,4,4,128,1,float16,float16,0,0.0936735987663269
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,4,2,128,1,float16,fp8,0,0.018705600500106813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,4,4,128,1,float16,fp8,0,0.07941280007362365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,4,4,128,1,fp8,fp8,0,0.07902079820632935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,4,1,128,1,float16,fp8,0,0.06877120137214661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,4,1,128,1,fp8,fp8,0,0.06864799857139588
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,4,2,128,1,float16,float16,0,0.07583360075950622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,4,2,128,1,float16,fp8,0,0.07128639817237854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,4,2,128,1,fp8,fp8,0,0.0717408001422882
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,4,4,128,1,float16,float16,0,0.053420799970626834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,4,4,128,1,float16,fp8,0,0.045291200280189514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,4,4,128,1,fp8,fp8,0,0.04522239863872528
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,4,1,128,1,float16,float16,0,0.04023520052433014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,4,1,128,1,float16,fp8,0,0.03838399946689606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,4,4,128,1,float16,fp8,0,0.029452800750732422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,4,2,128,1,float16,float16,0,0.043716800212860105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,4,2,128,1,float16,fp8,0,0.03956319987773895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,4,2,128,1,fp8,fp8,0,0.03940320014953613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,4,4,128,1,float16,float16,0,0.03056960105895996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,4,4,128,1,float16,fp8,0,0.024500800669193266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,4,4,128,1,fp8,fp8,0,0.024296000599861145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,4,1,128,1,float16,fp8,0,0.1253327965736389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,4,1,128,1,float16,float16,0,0.023080000281333925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,4,1,128,1,fp8,fp8,0,0.02340960055589676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,4,2,128,1,float16,float16,0,0.023286400735378264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,4,2,128,1,float16,fp8,0,0.02401120066642761
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,4,4,128,1,float16,float16,0,0.016672000288963318
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,4,1,128,1,fp8,fp8,0,0.01841920018196106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,4,4,128,1,float16,fp8,0,0.015934400260448456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,4,4,128,1,fp8,fp8,0,0.01594240069389343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,4,1,128,1,float16,float16,0,0.01525920033454895
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,4,1,128,1,fp8,fp8,0,0.015331199765205384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,4,2,128,1,float16,float16,0,0.015449599921703338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,4,2,128,1,float16,fp8,0,0.01581120043992996
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,4,2,128,1,fp8,fp8,0,0.015505599975585937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,4,4,128,1,float16,float16,0,0.014955200254917145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,4,4,128,1,float16,fp8,0,0.014920000731945039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,4,4,128,1,fp8,fp8,0,0.014865599572658539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,4,1,128,1,float16,float16,0,0.013929599523544311
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,4,1,128,1,float16,fp8,0,0.014343999326229095
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,4,1,128,1,fp8,fp8,0,0.014407999813556671
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,4,2,128,1,float16,float16,0,0.01451680064201355
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,4,2,128,1,float16,fp8,0,0.01467359960079193
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,4,2,128,1,fp8,fp8,0,0.014788800477981567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,4,1,128,1,fp8,fp8,0,0.03796800076961517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,4,4,128,1,float16,fp8,0,0.01441120058298111
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,4,4,128,1,fp8,fp8,0,0.014448000490665436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,4,1,128,1,float16,float16,0,0.013737599551677703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,4,1,128,1,float16,fp8,0,0.014078399538993836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,4,1,128,1,fp8,fp8,0,0.014056000113487243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,4,2,128,1,float16,float16,0,0.013731199502944946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,4,2,128,1,float16,fp8,0,0.014388799667358398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,4,2,128,1,fp8,fp8,0,0.014504000544548035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,4,4,128,1,float16,float16,0,0.013795199990272521
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,4,1,128,1,float16,fp8,0,0.02344000041484833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,4,4,128,1,float16,fp8,0,0.013952000439167023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,4,4,128,1,fp8,fp8,0,0.014076800644397735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,4,1,128,1,float16,float16,0,0.013486400246620178
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,4,1,128,1,float16,fp8,0,0.013899199664592743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,4,2,128,1,fp8,fp8,0,0.12965919971466064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,4,1,128,1,fp8,fp8,0,0.01369599997997284
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,4,2,128,1,float16,fp8,0,0.014008000493049622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,4,2,128,1,float16,float16,0,0.013516800105571746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,4,2,128,1,fp8,fp8,0,0.01420000046491623
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,4,1,128,1,float16,fp8,0,0.01526080071926117
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,4,4,128,1,float16,float16,0,0.01332319974899292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,4,4,128,1,float16,fp8,0,0.013803200423717498
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,4,4,128,1,fp8,fp8,0,0.013916799426078796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,4,1,128,1,float16,fp8,0,0.013443200290203095
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,4,1,128,1,fp8,fp8,0,0.01361600011587143
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,4,2,128,1,float16,float16,0,0.01313759982585907
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,4,2,128,1,float16,fp8,0,0.013788799941539764
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,4,1,128,1,float16,fp8,0,0.01764799952507019
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,4,1,128,1,float16,float16,0,0.06711999773979187
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,4,1,128,1,fp8,fp8,0,0.06684160232543945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,4,2,128,1,float16,float16,0,0.0743071973323822
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,4,2,128,1,float16,fp8,0,0.0681119978427887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,4,2,128,1,fp8,fp8,0,0.06817920207977295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,4,4,128,1,float16,float16,0,0.014276799559593201
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,4,4,128,1,float16,float16,0,0.0515936017036438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,4,4,128,1,fp8,fp8,0,0.04150559902191162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,4,1,128,1,float16,float16,0,0.037887999415397645
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,4,1,128,1,float16,fp8,0,0.03462400138378143
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,4,1,128,1,fp8,fp8,0,0.03466080129146576
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,4,2,128,1,float16,float16,0,0.04139359891414642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,4,2,128,1,float16,fp8,0,0.03606080114841461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,4,2,128,1,fp8,fp8,0,0.036268800497055054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,4,2,128,1,fp8,fp8,0,0.02464960068464279
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,4,4,128,1,float16,float16,0,0.028832000494003297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,4,4,128,1,fp8,fp8,0,0.02115360051393509
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,4,1,128,1,float16,float16,0,0.020476800203323365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,4,1,128,1,float16,fp8,0,0.02096160054206848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,4,1,128,1,float16,float16,0,0.06916000247001648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,4,1,128,1,fp8,fp8,0,0.020996800065040587
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,4,1,128,1,float16,float16,0,0.013153600692749023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,4,2,128,1,float16,float16,0,0.020528000593185425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,4,2,128,1,fp8,fp8,0,0.020868800580501556
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,4,4,128,1,float16,float16,0,0.015795199573040007
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,4,4,128,1,float16,fp8,0,0.01478080004453659
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,4,4,128,1,fp8,fp8,0,0.014732800424098969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,4,1,128,1,float16,float16,0,0.014340800046920777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,4,1,128,1,float16,fp8,0,0.014529600739479065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,4,1,128,1,fp8,fp8,0,0.014521600306034088
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,4,2,128,1,float16,float16,0,0.01430719941854477
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,4,1,128,1,float16,fp8,0,0.06664639711380005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,4,2,128,1,fp8,fp8,0,0.014577600359916686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,4,4,128,1,float16,float16,0,0.013948799669742584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,4,4,128,1,fp8,fp8,0,0.01345279961824417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,4,1,128,1,float16,float16,0,0.012966400384902954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,4,1,128,1,float16,fp8,0,0.013220800459384919
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,4,1,128,1,fp8,fp8,0,0.013065600395202636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,4,2,128,1,float16,float16,0,0.013145600259304047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,4,2,128,1,float16,fp8,0,0.013206399977207184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,4,2,128,1,fp8,fp8,0,0.013065600395202636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,4,4,128,1,float16,fp8,0,0.04131200015544891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,4,4,128,1,float16,float16,0,0.012854400277137756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,4,4,128,1,float16,fp8,0,0.012427199631929398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,4,4,128,1,fp8,fp8,0,0.013321599364280701
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,4,1,128,1,float16,float16,0,0.012323199957609176
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,4,1,128,1,float16,fp8,0,0.012656000256538392
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,4,1,128,1,fp8,fp8,0,0.01218400001525879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,4,2,128,1,float16,float16,0,0.01228640004992485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,4,2,128,1,fp8,fp8,0,0.01250240057706833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,4,4,128,1,float16,float16,0,0.012044800072908401
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,4,4,128,1,float16,fp8,0,0.012300799787044524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,4,4,128,1,fp8,fp8,0,0.012172800302505494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,4,1,128,1,float16,float16,0,0.01207519993185997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,4,4,128,1,float16,fp8,0,0.02122559994459152
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,4,1,128,1,float16,fp8,0,0.012111999839544297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,4,1,128,1,fp8,fp8,0,0.012062399834394454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,4,2,128,1,float16,float16,0,0.012097600102424621
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,4,2,128,1,float16,fp8,0,0.012455999851226807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,4,2,128,1,fp8,fp8,0,0.01207519993185997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,4,4,128,1,float16,fp8,0,0.011992000043392181
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,4,4,128,1,fp8,fp8,0,0.012062399834394454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,4,1,128,1,float16,float16,0,0.011606399714946748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,4,2,128,1,float16,fp8,0,0.021044799685478212
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,4,1,128,1,fp8,fp8,0,0.011801599711179733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,4,2,128,1,float16,fp8,0,0.011801599711179733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,4,2,128,1,fp8,fp8,0,0.01364160031080246
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,4,2,128,1,fp8,fp8,0,0.011798399686813354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,4,4,128,1,float16,float16,0,0.011363200098276138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,4,4,128,1,float16,fp8,0,0.011868800222873687
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,4,4,128,1,fp8,fp8,0,0.011785600334405899
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,4,1,128,1,float16,fp8,0,0.011820799857378005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,4,2,128,1,float16,fp8,0,0.014542399346828461
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,4,1,128,1,fp8,fp8,0,0.011856000125408172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,4,2,128,1,float16,float16,0,0.011503999680280685
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,4,2,128,1,fp8,fp8,0,0.011694400012493134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,4,1,128,1,float16,float16,0,0.039099198579788205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,4,1,128,1,float16,fp8,0,0.03660959899425507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,4,1,128,1,fp8,fp8,0,0.0364080011844635
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,4,2,128,1,float16,float16,0,0.04279359877109527
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,4,2,128,1,float16,fp8,0,0.03763839900493622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,4,2,128,1,fp8,fp8,0,0.03803519904613495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,4,4,128,1,float16,float16,0,0.031825599074363706
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,4,2,128,1,float16,fp8,0,0.012542399764060973
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,4,4,128,1,float16,fp8,0,0.02359199970960617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,4,4,128,1,fp8,fp8,0,0.023769600689411162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,4,1,128,1,float16,float16,0,0.02105119973421097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,4,1,128,1,float16,fp8,0,0.021775999665260316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,4,1,128,1,fp8,fp8,0,0.021775999665260316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,4,2,128,1,float16,float16,0,0.02149440050125122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,4,4,128,1,float16,float16,0,0.011832000315189361
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,4,2,128,1,float16,fp8,0,0.022011199593544008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,4,4,128,1,float16,float16,0,0.017132799327373504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,4,4,128,1,float16,fp8,0,0.01560640037059784
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,4,2,128,1,float16,float16,0,0.011635199934244157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,4,4,128,1,fp8,fp8,0,0.015614399313926696
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,4,1,128,1,float16,float16,0,0.014856000244617463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,4,1,128,1,float16,fp8,0,0.014961600303649902
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,4,1,128,1,float16,float16,0,0.011692799627780914
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,4,1,128,1,fp8,fp8,0,0.014979200065135955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,4,2,128,1,float16,fp8,0,0.014910399913787842
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,4,4,128,1,float16,float16,0,0.012433599680662155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,4,4,128,1,float16,fp8,0,0.013009600341320038
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,4,4,128,1,float16,fp8,0,0.011739200353622437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,4,4,128,1,fp8,fp8,0,0.011377599835395814
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,4,1,128,1,float16,float16,0,0.013129599392414093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,4,1,128,1,float16,fp8,0,0.013340799510478974
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,4,1,128,1,fp8,fp8,0,0.013288000226020813
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,4,2,128,1,float16,float16,0,0.013191999495029449
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,4,2,128,1,float16,fp8,0,0.013142399489879608
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,4,2,128,1,fp8,fp8,0,0.013350400328636169
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,4,4,128,1,float16,float16,0,0.010811199992895126
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,4,4,128,1,float16,fp8,0,0.010699199885129929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,4,4,128,1,fp8,fp8,0,0.010294400155544281
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,4,1,128,1,float16,float16,0,0.01255040019750595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,4,1,128,1,float16,fp8,0,0.012436799705028534
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,4,1,128,1,fp8,fp8,0,0.012854400277137756
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,4,2,128,1,float16,float16,0,0.01231359988451004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,4,2,128,1,float16,fp8,0,0.012748800218105316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,4,2,128,1,fp8,fp8,0,0.012625600397586822
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,4,4,128,1,float16,float16,0,0.010732799768447876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,4,4,128,1,float16,fp8,0,0.0102463997900486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,4,4,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,4,1,128,1,float16,float16,0,0.012033600360155106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,4,1,128,1,float16,fp8,0,0.012355200201272964
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,4,1,128,1,fp8,fp8,0,0.012297599762678146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,4,2,128,1,float16,float16,0,0.012240000069141388
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,4,2,128,1,float16,fp8,0,0.01228479966521263
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,4,2,128,1,fp8,fp8,0,0.012479999661445617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,4,4,128,1,float16,float16,0,0.010211200267076493
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,4,4,128,1,float16,fp8,0,0.010080000013113022
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,4,4,128,1,fp8,fp8,0,0.00992320030927658
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,4,1,128,1,float16,float16,0,0.01178240031003952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,4,1,128,1,float16,fp8,0,0.012031999975442886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,4,1,128,1,fp8,fp8,0,0.011974400281906128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,4,2,128,1,float16,float16,0,0.011806400120258331
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,4,2,128,1,float16,fp8,0,0.012008000165224075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,4,2,128,1,fp8,fp8,0,0.012028799951076507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,4,4,128,1,float16,float16,0,0.009859199821949004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,4,4,128,1,float16,fp8,0,0.009785600006580353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,4,4,128,1,fp8,fp8,0,0.009545599669218063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,4,1,128,1,float16,float16,0,0.011584000289440155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,4,1,128,1,float16,fp8,0,0.011769600212574005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,4,1,128,1,fp8,fp8,0,0.011817599833011627
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,4,2,128,1,float16,float16,0,0.01141119971871376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,4,2,128,1,float16,fp8,0,0.011883199959993363
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,4,2,128,1,fp8,fp8,0,0.011769600212574005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,4,4,128,1,float16,float16,0,0.009668800234794616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,4,4,128,1,float16,fp8,0,0.009491200000047684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,4,4,128,1,fp8,fp8,0,0.009382399916648864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,4,1,128,1,float16,float16,0,0.011524800211191177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,4,1,128,1,float16,fp8,0,0.011804799735546111
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,4,2,128,1,float16,float16,0,0.011531200259923935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,4,2,128,1,float16,fp8,0,0.011772800236940384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,4,2,128,1,fp8,fp8,0,0.011761599779129028
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,4,1,128,1,float16,float16,0,0.023612800240516662
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,4,1,128,1,float16,fp8,0,0.024993599951267244
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,4,2,128,1,float16,float16,0,0.014843200147151948
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,4,1,128,1,fp8,fp8,0,0.02489600032567978
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,4,2,128,1,fp8,fp8,0,0.015081599354743958
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,4,2,128,1,float16,float16,0,0.02661119997501373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,4,2,128,1,float16,fp8,0,0.02598559856414795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,4,2,128,1,float16,fp8,0,0.011801599711179733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,4,2,128,1,fp8,fp8,0,0.025963199138641358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,4,4,128,1,float16,fp8,0,0.022361600399017335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,4,4,128,1,fp8,fp8,0,0.022358399629592896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,4,1,128,1,float16,fp8,0,0.016491200029850005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,4,2,128,1,float16,float16,0,0.01687519997358322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,4,1,128,1,fp8,fp8,0,0.016521599888801575
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,4,2,128,1,float16,fp8,0,0.016630400717258454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,4,2,128,1,fp8,fp8,0,0.016814400255680085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,4,4,128,1,float16,float16,0,0.015547199547290802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,4,4,128,1,float16,fp8,0,0.014556799829006196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,4,4,128,1,fp8,fp8,0,0.014801600575447082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,4,1,128,1,float16,float16,0,0.013441599905490875
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,4,1,128,1,float16,fp8,0,0.01403679996728897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,4,1,128,1,fp8,fp8,0,0.013724799454212188
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,4,2,128,1,float16,float16,0,0.012011200189590454
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,4,2,128,1,float16,fp8,0,0.011828800290822982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,4,2,128,1,fp8,fp8,0,0.01159999966621399
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,4,4,128,1,float16,float16,0,0.011577600240707397
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,4,4,128,1,float16,fp8,0,0.011169599741697312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,4,4,128,1,fp8,fp8,0,0.011099199950695037
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,4,1,128,1,float16,float16,0,0.012984000146389008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,4,1,128,1,float16,fp8,0,0.0131632000207901
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,4,1,128,1,fp8,fp8,0,0.01324319988489151
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,4,2,128,1,float16,float16,0,0.011395200341939925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,4,2,128,1,fp8,fp8,0,0.011169599741697312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,4,4,128,1,float16,float16,0,0.011124800145626067
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,4,4,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,4,4,128,1,fp8,fp8,0,0.010326399654150008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,4,1,128,1,float16,float16,0,0.012051200121641159
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,4,1,128,1,float16,fp8,0,0.012247999757528305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,4,1,128,1,fp8,fp8,0,0.012265600264072418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,4,2,128,1,float16,float16,0,0.010289599746465683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,4,1,128,1,fp8,fp8,0,0.011878400295972823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,4,2,128,1,fp8,fp8,0,0.022519999742507936
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,4,2,128,1,fp8,fp8,0,0.010899200290441512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,4,4,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,4,4,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,4,4,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,4,1,128,1,float16,float16,0,0.011811199784278869
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,4,1,128,1,float16,fp8,0,0.012086399644613267
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,4,1,128,1,fp8,fp8,0,0.012052799761295318
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,4,2,128,1,float16,fp8,0,0.009851200133562088
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,4,2,128,1,fp8,fp8,0,0.009910400211811065
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,4,4,128,1,float16,float16,0,0.010047999769449234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,4,4,128,1,float16,float16,0,0.0237296000123024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,4,4,128,1,float16,fp8,0,0.009694399684667588
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,4,1,128,1,float16,float16,0,0.01610559970140457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,4,1,128,1,float16,float16,0,0.011553599685430526
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,4,1,128,1,fp8,fp8,0,0.011876799911260606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,4,2,128,1,float16,float16,0,0.009987200051546097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,4,2,128,1,float16,fp8,0,0.009708800166845322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,4,2,128,1,fp8,fp8,0,0.009719999879598618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,4,4,128,1,float16,float16,0,0.009836799651384353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,4,4,128,1,float16,fp8,0,0.009676799923181535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,4,4,128,1,fp8,fp8,0,0.009617599844932555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,4,1,128,1,float16,float16,0,0.01138719990849495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,4,1,128,1,float16,fp8,0,0.011675199866294861
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,4,1,128,1,fp8,fp8,0,0.011665599793195725
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,4,2,128,1,float16,fp8,0,0.011297599971294403
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,4,2,128,1,float16,float16,0,0.009665600210428237
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,4,2,128,1,fp8,fp8,0,0.009385599941015243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,4,4,128,1,float16,float16,0,0.009763199836015701
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,4,4,128,1,float16,fp8,0,0.009536000341176987
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,4,4,128,1,fp8,fp8,0,0.009550400078296661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,4,1,128,1,float16,float16,0,0.011590400338172912
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,4,1,128,1,float16,fp8,0,0.011849600076675414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,4,1,128,1,fp8,fp8,0,0.011734399944543839
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,4,2,128,1,float16,float16,0,0.009708800166845322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,4,2,128,1,float16,fp8,0,0.00947680026292801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,4,2,128,1,fp8,fp8,0,0.009473600238561631
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,4,2,128,1,float16,fp8,0,0.010249599814414978
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,4,1,128,1,float16,fp8,0,0.011998400092124939
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,4,1,128,1,float16,fp8,0,0.018855999410152435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,4,1,128,1,fp8,fp8,0,0.018875199556350707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,4,2,128,1,float16,fp8,0,0.02545439898967743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,4,2,128,1,float16,float16,0,0.01003360003232956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,4,2,128,1,fp8,fp8,0,0.024564799666404725
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,4,4,128,1,float16,float16,0,0.022305600345134735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,4,4,128,1,float16,fp8,0,0.021873599290847777
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,4,1,128,1,float16,float16,0,0.013595199584960938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,4,4,128,1,fp8,fp8,0,0.010041599720716476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,4,1,128,1,float16,fp8,0,0.011646399646997452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,4,2,128,1,float16,float16,0,0.01605439931154251
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,4,2,128,1,float16,fp8,0,0.01576640009880066
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,4,2,128,1,fp8,fp8,0,0.015697599947452547
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,4,4,128,1,float16,float16,0,0.014998400211334228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,4,4,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,4,4,128,1,fp8,fp8,0,0.014444799721240997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,4,1,128,1,float16,float16,0,0.011294399946928024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,4,1,128,1,float16,fp8,0,0.01165120005607605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,4,1,128,1,fp8,fp8,0,0.011363200098276138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,4,2,128,1,float16,float16,0,0.012070400267839431
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,4,2,128,1,float16,fp8,0,0.009556800127029419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,4,2,128,1,float16,fp8,0,0.01183359995484352
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,4,4,128,1,float16,float16,0,0.011233600229024887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,4,4,128,1,float16,fp8,0,0.011006399989128113
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,4,4,128,1,fp8,fp8,0,0.011003199964761734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,4,1,128,1,float16,float16,0,0.010902400314807891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,4,1,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,4,1,128,1,fp8,fp8,0,0.010924799740314484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,4,2,128,1,float16,float16,0,0.011072000116109848
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,4,2,128,1,float16,fp8,0,0.01098880022764206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,4,1,128,1,float16,float16,0,0.018940800428390504
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,4,2,128,1,fp8,fp8,0,0.010967999696731567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,4,4,128,1,float16,fp8,0,0.010134399682283402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,4,4,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,4,2,128,1,float16,float16,0,0.024872000515460967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,4,4,128,1,fp8,fp8,0,0.009990400075912476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,4,1,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,4,1,128,1,float16,fp8,0,0.00989919975399971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,4,1,128,1,fp8,fp8,0,0.010036800056695938
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,4,2,128,1,float16,fp8,0,0.009905599802732468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,4,2,128,1,fp8,fp8,0,0.010235200077295304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,4,4,128,1,float16,float16,0,0.009942399710416794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,4,4,128,1,float16,fp8,0,0.01021919995546341
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,4,4,128,1,fp8,fp8,0,0.009734400361776353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,4,4,128,1,fp8,fp8,0,0.021510399878025055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,4,1,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,4,1,128,1,float16,fp8,0,0.0098191998898983
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,4,1,128,1,float16,fp8,0,0.013752000033855438
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,4,2,128,1,float16,float16,0,0.010168000310659408
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,4,1,128,1,fp8,fp8,0,0.009803199768066406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,4,1,128,1,fp8,fp8,0,0.013255999982357025
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,4,2,128,1,fp8,fp8,0,0.00981760025024414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,4,4,128,1,float16,fp8,0,0.009625600278377533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,4,1,128,1,float16,float16,0,0.009747199714183807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,4,1,128,1,float16,fp8,0,0.009494400024414063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,4,2,128,1,float16,float16,0,0.00963039994239807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,4,2,128,1,float16,fp8,0,0.009374400228261947
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,4,2,128,1,fp8,fp8,0,0.009446399658918381
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,4,4,128,1,float16,float16,0,0.009703999757766724
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,4,4,128,1,float16,fp8,0,0.009444800019264222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,4,4,128,1,fp8,fp8,0,0.009564799815416336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,4,1,128,1,float16,float16,0,0.00979520007967949
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,4,1,128,1,float16,fp8,0,0.009699200093746186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,4,1,128,1,fp8,fp8,0,0.009679999947547913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,4,2,128,1,float16,float16,0,0.009691199660301209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,4,2,128,1,float16,fp8,0,0.009487999975681305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,4,2,128,1,fp8,fp8,0,0.00939679965376854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,4,4,128,1,float16,float16,0,0.009611199796199798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,4,4,128,1,float16,fp8,0,0.009382399916648864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,4,4,128,1,fp8,fp8,0,0.009388799965381622
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,4,1,128,1,float16,float16,0,0.00955360010266304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,4,1,128,1,float16,fp8,0,0.009487999975681305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,4,1,128,1,fp8,fp8,0,0.009521599858999252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,4,2,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,4,2,128,1,float16,float16,0,0.00987040027976036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,4,2,128,1,float16,fp8,0,0.00963039994239807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,4,1,128,1,float16,float16,0,0.01830720007419586
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,4,1,128,1,float16,fp8,0,0.01820800006389618
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,4,1,128,1,fp8,fp8,0,0.018049600720405578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,4,2,128,1,float16,fp8,0,0.009694399684667588
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,4,2,128,1,float16,float16,0,0.02372799962759018
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,4,4,128,1,float16,float16,0,0.009806399792432785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,4,2,128,1,fp8,fp8,0,0.023639999330043793
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,4,1,128,1,fp8,fp8,0,0.00960640013217926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,4,4,128,1,fp8,fp8,0,0.021139200031757354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,4,1,128,1,float16,float16,0,0.013059200346469879
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,4,1,128,1,float16,fp8,0,0.012919999659061432
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,4,1,128,1,fp8,fp8,0,0.012943999469280243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,4,2,128,1,float16,float16,0,0.015558399260044098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,4,2,128,1,float16,fp8,0,0.015518400073051452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,4,2,128,1,fp8,fp8,0,0.015566399693489075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,4,4,128,1,float16,float16,0,0.014127999544143677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,4,4,128,1,float16,fp8,0,0.01401119977235794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,4,4,128,1,fp8,fp8,0,0.014017599821090698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,4,2,128,1,fp8,fp8,0,0.0117807999253273
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,4,1,128,1,float16,float16,0,0.011164800077676774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,4,1,128,1,fp8,fp8,0,0.011142399907112122
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,4,2,128,1,float16,float16,0,0.011318399757146835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,4,2,128,1,float16,fp8,0,0.01130559965968132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,4,2,128,1,fp8,fp8,0,0.011294399946928024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,4,4,128,1,float16,float16,0,0.010900799930095673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,4,4,128,1,float16,fp8,0,0.010876800119876861
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,4,4,128,1,fp8,fp8,0,0.010916800051927567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,4,1,128,1,float16,float16,0,0.010849600285291671
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,4,1,128,1,float16,fp8,0,0.010761599987745285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,4,1,128,1,fp8,fp8,0,0.010751999914646149
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,4,2,128,1,float16,float16,0,0.01080320030450821
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,4,2,128,1,float16,fp8,0,0.010790400207042694
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,4,2,128,1,fp8,fp8,0,0.010604800283908844
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,4,4,128,1,float16,float16,0,0.010014399886131287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,4,2,128,1,fp8,fp8,0,0.009603200107812881
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,4,4,128,1,float16,fp8,0,0.009942399710416794
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,4,1,128,1,float16,float16,0,0.009692800045013428
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,4,1,128,1,float16,fp8,0,0.00955199971795082
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,4,1,128,1,fp8,fp8,0,0.009683199971914292
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,4,2,128,1,float16,float16,0,0.009881599992513656
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,4,2,128,1,float16,fp8,0,0.009934400022029877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,4,2,128,1,fp8,fp8,0,0.009904000163078307
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,4,4,128,1,fp8,fp8,0,0.009548799693584442
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,4,4,128,1,float16,fp8,0,0.021289600431919097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,4,4,128,1,float16,fp8,0,0.009944000095129014
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,4,1,128,1,float16,float16,0,0.009950400143861771
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,4,1,128,1,fp8,fp8,0,0.010067199915647506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,4,2,128,1,float16,float16,0,0.009905599802732468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,4,2,128,1,float16,fp8,0,0.0098191998898983
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,4,2,128,1,fp8,fp8,0,0.009836799651384353
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,4,4,128,1,float16,float16,0,0.009811200201511383
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,4,4,128,1,float16,fp8,0,0.009516800194978714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,4,4,128,1,fp8,fp8,0,0.009556800127029419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,4,1,128,1,float16,float16,0,0.009471999853849411
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,4,1,128,1,float16,fp8,0,0.009452799707651139
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,4,1,128,1,fp8,fp8,0,0.00947680026292801
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,4,1,128,1,float16,fp8,0,0.01127839982509613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,4,2,128,1,float16,fp8,0,0.009331200271844864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,4,2,128,1,fp8,fp8,0,0.00933919996023178
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,4,4,128,1,float16,float16,0,0.009550400078296661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,4,4,128,1,float16,fp8,0,0.009542399644851684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,4,4,128,1,fp8,fp8,0,0.009694399684667588
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,4,1,128,1,float16,float16,0,0.009505599737167358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,4,1,128,1,float16,fp8,0,0.009510400146245957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,4,1,128,1,fp8,fp8,0,0.009361600130796432
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,4,2,128,1,float16,float16,0,0.009550400078296661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,4,2,128,1,float16,fp8,0,0.009344000369310379
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,4,2,128,1,fp8,fp8,0,0.009371200203895569
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,4,4,128,1,float16,float16,0,0.00944959968328476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,4,4,128,1,float16,fp8,0,0.00941760018467903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,4,4,128,1,fp8,fp8,0,0.009404800087213516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,4,1,128,1,float16,float16,0,0.00958240032196045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,4,4,128,1,fp8,fp8,0,0.010132800042629241
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,4,1,128,1,float16,fp8,0,0.009273599833250046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,4,1,128,1,fp8,fp8,0,0.009272000193595887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,4,2,128,1,float16,float16,0,0.009470400214195252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,4,4,128,1,float16,float16,0,0.010159999877214432
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,4,2,128,1,float16,fp8,0,0.009359999746084213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,4,2,128,1,fp8,fp8,0,0.009561599791049957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,4,4,128,1,fp8,fp8,0,0.00992799997329712
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,4,1,128,1,float16,fp8,0,0.00997920036315918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,2,2,128,1,float16,float16,0,0.8724047660827636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,4,2,128,1,float16,float16,0,0.00974079966545105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,2,1,128,1,float16,fp8,0,1.4806559562683106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,2,1,128,1,fp8,fp8,0,1.488304042816162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16384,2,1,128,1,float16,float16,0,1.7711296081542969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,2,2,128,1,float16,fp8,0,0.7742832183837891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,2,2,128,1,fp8,fp8,0,0.7498208045959472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,2,1,128,1,float16,fp8,0,0.7493567943572998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,2,1,128,1,fp8,fp8,0,0.8366800308227539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,2,2,128,1,float16,float16,0,0.44481759071350097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,2,2,128,1,float16,fp8,0,0.37839040756225584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,2,2,128,1,fp8,fp8,0,0.38129758834838867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,2,1,128,1,float16,float16,0,0.44562082290649413
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,2,1,128,1,float16,fp8,0,0.3921792030334473
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,2,1,128,1,fp8,fp8,0,0.37637441158294677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,2,2,128,1,float16,float16,0,0.22464640140533448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,4,2,128,1,float16,fp8,0,0.02332800030708313
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,2,2,128,1,float16,fp8,0,0.207260799407959
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,2,2,128,1,fp8,fp8,0,0.20094079971313478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,2,1,128,1,float16,fp8,0,0.19360320568084716
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,2,1,128,1,fp8,fp8,0,0.20868320465087892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,2,1,128,1,float16,fp8,0,0.8825216293334961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,2,2,128,1,float16,float16,0,0.5683296203613282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,2,1,128,1,float16,float16,0,1.0044015884399413
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,2,2,128,1,float16,fp8,0,0.4549920082092285
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,2,2,128,1,fp8,fp8,0,0.4705455780029297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,2,1,128,1,float16,float16,0,0.5272528171539307
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,2,1,128,1,float16,fp8,0,0.45401921272277834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,2,2,128,1,float16,float16,0,0.3177903890609741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,2,2,128,1,float16,fp8,0,0.24161601066589355
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,2,1,128,1,float16,float16,0,0.8563967704772949
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,2,1,128,1,float16,fp8,0,0.2415087938308716
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,2,1,128,1,fp8,fp8,0,0.24195680618286133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,2,2,128,1,float16,float16,0,0.16957600116729737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,2,2,128,1,float16,fp8,0,0.14929440021514892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,2,2,128,1,fp8,fp8,0,0.14983999729156494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,4,4,128,1,float16,float16,0,0.021070399880409242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,2,1,128,1,float16,float16,0,0.16870720386505128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,2,1,128,1,float16,float16,0,0.2192944049835205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,2,1,128,1,fp8,fp8,0,0.14924319982528686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,2,1,128,1,float16,float16,0,0.7178719997406006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,2,1,128,1,float16,fp8,0,0.6275087833404541
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,10240,2,1,128,1,fp8,fp8,0,0.6343183994293213
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,12288,2,1,128,1,fp8,fp8,0,0.8729935646057129
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,2,2,128,1,float16,float16,0,0.4083280086517334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,2,2,128,1,fp8,fp8,0,0.3312351942062378
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,2,1,128,1,float16,float16,0,0.3705904006958008
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,2,1,128,1,float16,fp8,0,0.32814080715179444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,2,1,128,1,fp8,fp8,0,0.32986719608306886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,2,2,128,1,float16,float16,0,0.23900799751281737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,2,2,128,1,float16,fp8,0,0.17374080419540405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,2,2,128,1,fp8,fp8,0,0.1750656008720398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,2,1,128,1,fp8,fp8,0,0.46352319717407225
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,2,1,128,1,float16,float16,0,0.19487040042877196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,2,1,128,1,float16,fp8,0,0.17397119998931884
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,2,1,128,1,fp8,fp8,0,0.17515840530395507
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,2,2,128,1,float16,float16,0,0.14187999963760375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,2,2,128,1,float16,fp8,0,0.12477920055389405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,2,2,128,1,fp8,fp8,0,0.12459520101547242
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,2,2,128,1,fp8,fp8,0,0.2525343894958496
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,2,1,128,1,float16,float16,0,0.14118080139160155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,2,1,128,1,float16,fp8,0,0.12450239658355713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,2,1,128,1,float16,float16,0,0.275217604637146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,2,2,128,1,float16,float16,0,0.4768064022064209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,2,2,128,1,float16,fp8,0,0.40522241592407227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,2,1,128,1,float16,fp8,0,0.8014032363891601
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,2,1,128,1,float16,float16,0,0.9098336219787597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,8192,2,1,128,1,fp8,fp8,0,0.8022031784057617
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,2,2,128,1,fp8,fp8,0,0.4101151943206787
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,2,1,128,1,float16,float16,0,0.4644495964050293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,2,2,128,1,float16,float16,0,0.24093120098114013
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,2,1,128,1,float16,fp8,0,0.4067471981048584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,2,1,128,1,fp8,fp8,0,0.4274847984313965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,2,2,128,1,float16,fp8,0,0.21003999710083007
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,2,2,128,1,fp8,fp8,0,0.2075727939605713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,2,1,128,1,float16,float16,0,0.23383679389953613
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,2,1,128,1,float16,fp8,0,0.2072160005569458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,2,2,128,1,float16,float16,0,0.12601759433746337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,2,2,128,1,float16,fp8,0,0.10761439800262451
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,2,2,128,1,fp8,fp8,0,0.10864479541778564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,2,1,128,1,float16,float16,0,0.12057440280914307
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,2,1,128,1,fp8,fp8,0,0.10614080429077148
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,2,2,128,1,float16,float16,0,0.11293439865112305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,2,2,128,1,float16,fp8,0,0.10295360088348389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,2,2,128,1,fp8,fp8,0,0.10212479829788208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,2,2,128,1,float16,fp8,0,0.32894880771636964
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,2,1,128,1,float16,fp8,0,0.10340640544891358
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,2,1,128,1,fp8,fp8,0,0.10216000080108642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,2,1,128,1,float16,fp8,0,0.510923194885254
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,2,1,128,1,fp8,fp8,0,0.5068016052246094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,2,2,128,1,float16,fp8,0,0.2708832025527954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,2,2,128,1,float16,float16,0,0.31485600471496583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,2,1,128,1,fp8,fp8,0,0.12536799907684326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,2,2,128,1,fp8,fp8,0,0.2737200021743774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,2,1,128,1,float16,float16,0,0.3037775993347168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,2,1,128,1,float16,fp8,0,0.27135519981384276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,2,1,128,1,float16,fp8,0,0.14948159456253052
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,2,1,128,1,fp8,fp8,0,0.27024478912353517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,2,2,128,1,float16,float16,0,0.17250720262527466
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,2,2,128,1,float16,fp8,0,0.14977920055389404
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,2,1,128,1,float16,float16,0,0.16811360120773317
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,2,1,128,1,float16,fp8,0,0.14975359439849853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,2,1,128,1,fp8,fp8,0,0.14721759557723998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,2,2,128,1,float16,float16,0,0.09525439739227295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,2,2,128,1,float16,fp8,0,0.08383039832115173
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,2,2,128,1,fp8,fp8,0,0.08382560014724731
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,2,1,128,1,float16,fp8,0,0.10759520530700684
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,2,1,128,1,float16,float16,0,0.09303359985351563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,2,1,128,1,float16,fp8,0,0.08414720296859741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,2,1,128,1,fp8,fp8,0,0.08414720296859741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,2,2,128,1,float16,float16,0,0.06744800209999084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,2,2,128,1,float16,fp8,0,0.06254400014877319
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,2,2,128,1,fp8,fp8,0,0.06312159895896911
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,2,1,128,1,float16,float16,0,0.05912799835205078
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,2,1,128,1,float16,fp8,0,0.05522239804267883
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,2,1,128,1,fp8,fp8,0,0.055504000186920165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,6144,2,1,128,1,float16,float16,0,0.5697328090667725
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,2,1,128,1,float16,fp8,0,0.492244815826416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,2,1,128,1,float16,float16,0,0.5533055782318115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,4096,2,1,128,1,fp8,fp8,0,0.4970911979675293
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,2,2,128,1,float16,fp8,0,0.2635567903518677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,2,2,128,1,fp8,fp8,0,0.2665424108505249
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,2,2,128,1,float16,float16,0,0.3011775970458984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,2,2,128,1,float16,float16,0,0.16895840167999268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,2,1,128,1,fp8,fp8,0,0.262988805770874
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,2,2,128,1,float16,fp8,0,0.14619359970092774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,2,2,128,1,fp8,fp8,0,0.14751360416412354
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,2,1,128,1,float16,float16,0,0.16251360177993773
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,2,1,128,1,float16,fp8,0,0.14563839435577391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,2,1,128,1,fp8,fp8,0,0.14466880559921264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,2,2,128,1,float16,float16,0,0.10259360074996948
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,2,2,128,1,float16,fp8,0,0.08716800212860107
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,2,1,128,1,float16,float16,0,0.09756479859352112
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,2,1,128,1,float16,fp8,0,0.0877568006515503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,2,1,128,1,fp8,fp8,0,0.08792160153388977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,2,1,128,1,fp8,fp8,0,0.21121599674224853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,2,2,128,1,float16,float16,0,0.06428319811820984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,2,2,128,1,float16,fp8,0,0.05905600190162659
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,2,2,128,1,fp8,fp8,0,0.059520000219345094
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,2,1,128,1,float16,float16,0,0.06408159732818604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,2,2,128,1,float16,float16,0,0.04278239905834198
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,2,1,128,1,float16,float16,0,0.11268479824066162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,2,2,128,1,float16,fp8,0,0.040275201201438904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,2,1,128,1,float16,float16,0,0.04275520145893097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,2,2,128,1,fp8,fp8,0,0.04043999910354614
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,2,1,128,1,float16,fp8,0,0.04035040140151978
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,2,1,128,1,fp8,fp8,0,0.04015200138092041
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,2,1,128,1,float16,float16,0,0.34232320785522463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,2,1,128,1,float16,fp8,0,0.3116127967834473
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,2,1,128,1,float16,float16,0,0.29565279483795165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,2,2,128,1,float16,float16,0,0.19589279890060424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,3072,2,1,128,1,fp8,fp8,0,0.3117343902587891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,2,1,128,1,float16,fp8,0,0.2617727994918823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,2,2,128,1,float16,fp8,0,0.17331199645996093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,2,1,128,1,float16,float16,0,0.1866976022720337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,2,1,128,1,float16,fp8,0,0.16868799924850464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,2,1,128,1,fp8,fp8,0,0.16943999528884887
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,2,2,128,1,float16,fp8,0,0.0985040009021759
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,2,2,128,1,fp8,fp8,0,0.09905599951744079
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,2,2,128,1,fp8,fp8,0,0.14742720127105713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,2,1,128,1,float16,float16,0,0.10734399557113647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,2,2,128,1,fp8,fp8,0,0.08684639930725098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,2,2,128,1,float16,float16,0,0.06494879722595215
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,2,1,128,1,fp8,fp8,0,0.09677119851112366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,2,2,128,1,float16,fp8,0,0.059329599142074585
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,2,1,128,1,float16,float16,0,0.06282240152359009
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,2,1,128,1,float16,fp8,0,0.05830240249633789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,2,1,128,1,fp8,fp8,0,0.05834400057792664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,2,1,128,1,float16,fp8,0,0.05926399827003479
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,2,2,128,1,float16,fp8,0,0.04719519913196564
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,2,1,128,1,fp8,fp8,0,0.05969759821891785
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,2,2,128,1,fp8,fp8,0,0.046851199865341184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,2,1,128,1,float16,float16,0,0.04651199877262115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,2,1,128,1,float16,fp8,0,0.04482559859752655
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,2,2,128,1,float16,float16,0,0.029756799340248108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,2,1,128,1,fp8,fp8,0,0.04490239918231964
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,2,2,128,1,float16,fp8,0,0.029067200422286988
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,2,2,128,1,fp8,fp8,0,0.02906399965286255
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,2,1,128,1,float16,float16,0,0.029595199227333068
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,2,1,128,1,float16,fp8,0,0.02901119887828827
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,2,1,128,1,fp8,fp8,0,0.02879520058631897
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,2,1,128,1,float16,fp8,0,0.3187376022338867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,2,1,128,1,fp8,fp8,0,0.31870241165161134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,2,2,128,1,float16,fp8,0,0.1739375948905945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,2,2,128,1,fp8,fp8,0,0.17380160093307495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,2,1,128,1,float16,float16,0,0.1873568058013916
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,2,1,128,1,float16,fp8,0,0.17047840356826782
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,2,1,128,1,float16,fp8,0,0.09660639762878417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,2,1,128,1,fp8,fp8,0,0.1703968048095703
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,2,2,128,1,float16,float16,0,0.11083840131759644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,2,2,128,1,float16,fp8,0,0.09672639966011047
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,2,2,128,1,fp8,fp8,0,0.09731360077857971
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,2,2,128,1,float16,float16,0,0.049351999163627626
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,2,1,128,1,float16,float16,0,0.10479199886322021
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,2,1,128,1,float16,fp8,0,0.09407039880752563
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,2,2,128,1,float16,float16,0,0.06743360161781312
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,2,2,128,1,float16,fp8,0,0.0579472005367279
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,2,2,128,1,fp8,fp8,0,0.05797920227050781
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,2,1,128,1,float16,float16,0,0.06285120248794555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,2,1,128,1,float16,fp8,0,0.058387202024459836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,2048,2,1,128,1,float16,float16,0,0.345579195022583
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,2,1,128,1,fp8,fp8,0,0.05823519825935364
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,2,2,128,1,float16,float16,0,0.03922559916973114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,2,2,128,1,float16,float16,0,0.19794080257415772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,2,1,128,1,float16,float16,0,0.03938399851322174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,2,2,128,1,fp8,fp8,0,0.17274080514907836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,2,1,128,1,float16,fp8,0,0.03655360043048859
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,2,1,128,1,fp8,fp8,0,0.03645919859409332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,2,2,128,1,float16,float16,0,0.11125919818878174
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,2,2,128,1,float16,fp8,0,0.029836800694465638
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,2,1,128,1,float16,float16,0,0.030619201064109803
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,2,2,128,1,fp8,fp8,0,0.029790401458740234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,2,1,128,1,float16,fp8,0,0.030049601197242738
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,2,1,128,1,fp8,fp8,0,0.029934400320053102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,2,2,128,1,float16,float16,0,0.021726399660110474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,2,2,128,1,float16,fp8,0,0.02210240066051483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,2,2,128,1,fp8,fp8,0,0.022129599750041962
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,2,1,128,1,float16,float16,0,0.021454399824142455
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,2,1,128,1,float16,fp8,0,0.021990400552749634
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,2,2,128,1,fp8,fp8,0,0.05987679958343506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,2,1,128,1,fp8,fp8,0,0.022067199647426605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,2,1,128,1,float16,float16,0,0.2267280101776123
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,2,1,128,1,fp8,fp8,0,0.21175999641418458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,2,2,128,1,float16,float16,0,0.13229600191116334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,2,2,128,1,float16,fp8,0,0.11829440593719483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,2,2,128,1,fp8,fp8,0,0.11727839708328247
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,2,1,128,1,float16,float16,0,0.12409759759902954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,2,1,128,1,float16,fp8,0,0.1148527979850769
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,2,2,128,1,float16,float16,0,0.07666879892349243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,2,1,128,1,fp8,fp8,0,0.11485600471496582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,2,2,128,1,float16,fp8,0,0.036478400230407715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,2,2,128,1,fp8,fp8,0,0.06743040084838867
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,2,2,128,1,fp8,fp8,0,0.03660320043563843
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,2,1,128,1,float16,float16,0,0.0714847981929779
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,2,1,128,1,float16,fp8,0,0.06558240056037903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,2,2,128,1,float16,float16,0,0.045372799038887024
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,2,2,128,1,fp8,fp8,0,0.04284639954566956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,2,2,128,1,float16,float16,0,0.030795198678970338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,2,1,128,1,float16,float16,0,0.04391199946403503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,2,1,128,1,float16,fp8,0,0.04284160137176514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,2,2,128,1,float16,float16,0,0.03743360042572021
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,2,2,128,1,float16,fp8,0,0.03691999912261963
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,2,2,128,1,fp8,fp8,0,0.03693920075893402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,2,1,128,1,float16,float16,0,0.03707199990749359
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,2,1,128,1,float16,fp8,0,0.036769598722457886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,2,1,128,1,fp8,fp8,0,0.03685440123081207
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,2,2,128,1,float16,float16,0,0.0244159996509552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,2,2,128,1,float16,fp8,0,0.02380319982767105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,2,2,128,1,fp8,fp8,0,0.023809599876403808
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,2,1,128,1,float16,float16,0,0.0230880007147789
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,2,1,128,1,float16,fp8,0,0.023875199258327484
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,2,2,128,1,float16,float16,0,0.019832000136375427
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,2,2,128,1,float16,fp8,0,0.01937599927186966
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1536,2,1,128,1,float16,fp8,0,0.2119983911514282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,2,2,128,1,fp8,fp8,0,0.01932799965143204
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,2,1,128,1,float16,fp8,0,0.019172799587249757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,2,1,128,1,float16,float16,0,0.019696000218391418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,2,1,128,1,fp8,fp8,0,0.019099199771881105
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,2,1,128,1,float16,float16,0,0.24432640075683593
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,2,1,128,1,float16,fp8,0,0.23300480842590332
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,2,2,128,1,float16,fp8,0,0.06784160137176513
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1024,2,1,128,1,fp8,fp8,0,0.2329472064971924
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,2,2,128,1,fp8,fp8,0,0.12912479639053345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,2,2,128,1,float16,fp8,0,0.04266720116138458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,2,1,128,1,float16,float16,0,0.13125280141830445
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,2,1,128,1,fp8,fp8,0,0.04282560050487518
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,2,1,128,1,float16,fp8,0,0.12395360469818115
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,2,1,128,1,fp8,fp8,0,0.12427040338516235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,2,2,128,1,float16,float16,0,0.08011839985847473
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,2,2,128,1,fp8,fp8,0,0.0729695975780487
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,2,1,128,1,float16,float16,0,0.07413920164108276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,2,1,128,1,float16,fp8,0,0.070004802942276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,2,1,128,1,fp8,fp8,0,0.023713600635528565
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,2,2,128,1,float16,fp8,0,0.04288319945335388
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,2,1,128,1,fp8,fp8,0,0.09465280175209045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,2,2,128,1,fp8,fp8,0,0.043119999766349795
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,2,1,128,1,float16,fp8,0,0.042710399627685545
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,2,1,128,1,float16,float16,0,0.04409759938716888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,2,1,128,1,fp8,fp8,0,0.042868798971176146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,2,2,128,1,float16,float16,0,0.026915198564529418
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,2,2,128,1,float16,fp8,0,0.026206400990486146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,2,2,128,1,fp8,fp8,0,0.026046401262283324
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,2,1,128,1,float16,float16,0,0.026895999908447266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,2,1,128,1,float16,fp8,0,0.026132801175117494
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,2,1,128,1,fp8,fp8,0,0.025953599810600282
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,2,2,128,1,float16,float16,0,0.02468799948692322
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,2,2,128,1,float16,fp8,0,0.024897600710391998
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,2,2,128,1,fp8,fp8,0,0.024935999512672426
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,2,1,128,1,float16,float16,0,0.02444639950990677
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,2,1,128,1,float16,fp8,0,0.02473759949207306
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,2,2,128,1,float16,float16,0,0.14242080450057984
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,2,1,128,1,fp8,fp8,0,0.02473919987678528
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,2,2,128,1,float16,float16,0,0.01996160000562668
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,2,2,128,1,float16,fp8,0,0.02000479996204376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,2,1,128,1,float16,float16,0,0.01992480009794235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,2,2,128,1,fp8,fp8,0,0.0200095996260643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,2,1,128,1,float16,fp8,0,0.019791999459266664
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,2,1,128,1,fp8,fp8,0,0.019952000677585603
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,2,2,128,1,float16,fp8,0,0.12959680557250977
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,2,2,128,1,float16,float16,0,0.017564800381660462
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,2,2,128,1,fp8,fp8,0,0.018607999384403228
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,2,1,128,1,fp8,fp8,0,0.06502559781074524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,2,1,128,1,float16,float16,0,0.0172992005944252
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,2,1,128,1,fp8,fp8,0,0.01852000057697296
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,2,1,128,1,float16,fp8,0,0.018481600284576415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,2,1,128,1,float16,float16,0,0.16863199472427368
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,2,2,128,1,float16,float16,0,0.10400160551071166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,2,1,128,1,fp8,fp8,0,0.16834720373153686
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,2,2,128,1,float16,fp8,0,0.09644160270690919
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,2,2,128,1,fp8,fp8,0,0.09665279984474182
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,2,1,128,1,float16,float16,0,0.0918287992477417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,2,2,128,1,float16,fp8,0,0.07251359820365906
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,2,1,128,1,float16,fp8,0,0.09148160219192505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,2,1,128,1,fp8,fp8,0,0.09161919951438904
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,2,2,128,1,fp8,fp8,0,0.05481439828872681
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,2,2,128,1,float16,fp8,0,0.05491200089454651
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,2,1,128,1,float16,float16,0,0.05371040105819702
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,2,1,128,1,float16,fp8,0,0.05125759840011597
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,2,1,128,1,fp8,fp8,0,0.06944640278816223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,2,2,128,1,float16,float16,0,0.03629119992256165
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,2,2,128,1,float16,fp8,0,0.032313600182533264
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,2,2,128,1,float16,float16,0,0.048630398511886594
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,2,2,128,1,fp8,fp8,0,0.0324319988489151
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,2,1,128,1,float16,float16,0,0.03123359978199005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,2,1,128,1,fp8,fp8,0,0.031697601079940796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,2,2,128,1,float16,fp8,0,0.02016319930553436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,2,2,128,1,fp8,fp8,0,0.020260800421237946
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,2,1,128,1,float16,fp8,0,0.019942399859428406
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,2,1,128,1,fp8,fp8,0,0.019867199659347533
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,2,2,128,1,float16,float16,0,0.017972800135612487
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,2,2,128,1,float16,fp8,0,0.01844480037689209
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,2,2,128,1,fp8,fp8,0,0.018486399948596955
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,2,1,128,1,float16,float16,0,0.01785600036382675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,2,1,128,1,float16,fp8,0,0.018320000171661376
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,2,1,128,1,fp8,fp8,0,0.01835999935865402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,2,2,128,1,float16,float16,0,0.01733600050210953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,2,2,128,1,float16,fp8,0,0.017584000527858735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,2,2,128,1,fp8,fp8,0,0.017694400250911714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,2,1,128,1,float16,float16,0,0.017022399604320525
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,2,2,128,1,float16,fp8,0,0.018129600584506987
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,2,1,128,1,float16,fp8,0,0.01748799979686737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,2,1,128,1,fp8,fp8,0,0.017403200268745422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,2,2,128,1,float16,float16,0,0.01644960045814514
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,2,2,128,1,float16,fp8,0,0.01733119934797287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,2,2,128,1,fp8,fp8,0,0.017259199917316437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,2,1,128,1,float16,float16,0,0.016531200706958772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,2,1,128,1,float16,fp8,0,0.01706240028142929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,2,1,128,1,fp8,fp8,0,0.017217600345611574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,2,2,128,1,float16,float16,0,0.016249600052833556
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,512,2,1,128,1,float16,fp8,0,0.16921119689941405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,2,2,128,1,float16,fp8,0,0.017156800627708434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,2,2,128,1,fp8,fp8,0,0.017092800140380858
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,2,1,128,1,float16,fp8,0,0.016944000124931337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,2,1,128,1,fp8,fp8,0,0.01693280041217804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,2,1,128,1,float16,float16,0,0.0161423996090889
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,2,1,128,1,float16,float16,0,0.08176640272140503
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,2,1,128,1,fp8,fp8,0,0.08271999955177307
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,2,2,128,1,float16,float16,0,0.059969598054885866
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,2,2,128,1,float16,float16,0,0.05424000024795532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,2,2,128,1,float16,fp8,0,0.04848639965057373
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,2,2,128,1,fp8,fp8,0,0.04787200093269348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,2,1,128,1,float16,float16,0,0.04645119905471802
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,2,1,128,1,float16,fp8,0,0.04612640142440796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,2,1,128,1,fp8,fp8,0,0.04610080122947693
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,2,2,128,1,float16,fp8,0,0.02824319899082184
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,2,2,128,1,fp8,fp8,0,0.0281792014837265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,2,1,128,1,float16,float16,0,0.026067200303077697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,2,1,128,1,float16,fp8,0,0.02771199941635132
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,2,1,128,1,fp8,fp8,0,0.027750399708747864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,2,1,128,1,float16,fp8,0,0.03141280114650726
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,2,2,128,1,float16,fp8,0,0.0178384006023407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,2,2,128,1,float16,float16,0,0.01964640021324158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,2,2,128,1,fp8,fp8,0,0.01775680035352707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,2,1,128,1,float16,float16,0,0.017083199322223665
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,2,1,128,1,float16,fp8,0,0.017324799299240114
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,2,2,128,1,float16,float16,0,0.015092800557613372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,2,2,128,1,float16,fp8,0,0.015624000132083893
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,2,1,128,1,float16,float16,0,0.015011200308799743
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,2,1,128,1,float16,fp8,0,0.015343999862670899
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,2,1,128,1,fp8,fp8,0,0.015440000593662262
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,2,2,128,1,float16,float16,0,0.014640000462532044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,2,2,128,1,float16,fp8,0,0.015036800503730774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,2,2,128,1,fp8,fp8,0,0.014920000731945039
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,2,1,128,1,float16,float16,0,0.01387999951839447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,2,1,128,1,float16,fp8,0,0.014350399374961853
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,2,1,128,1,fp8,fp8,0,0.014361600577831268
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,2,2,128,1,float16,float16,0,0.013548800349235534
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,2,2,128,1,float16,fp8,0,0.014022399485111237
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,2,2,128,1,fp8,fp8,0,0.01406400054693222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,2,1,128,1,float16,float16,0,0.013436800241470337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,2,1,128,1,float16,fp8,0,0.013820800185203552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,256,2,1,128,1,float16,fp8,0,0.08271359801292419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,2,1,128,1,fp8,fp8,0,0.01372160017490387
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,2,2,128,1,float16,float16,0,0.013238400220870972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,2,2,128,1,float16,fp8,0,0.013820800185203552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,2,2,128,1,fp8,fp8,0,0.013782399892807006
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,2,1,128,1,fp8,fp8,0,0.05135999917984009
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,2,1,128,1,float16,float16,0,0.013265599310398103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,2,1,128,1,float16,fp8,0,0.013838399946689606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,2,2,128,1,float16,float16,0,0.017401599884033205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,2,2,128,1,float16,float16,0,0.013199999928474426
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,2,2,128,1,float16,fp8,0,0.013887999951839447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,2,2,128,1,fp8,fp8,0,0.013872000575065612
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,2,1,128,1,float16,float16,0,0.012980799376964568
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,2,1,128,1,float16,fp8,0,0.013739199936389923
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,2,1,128,1,float16,float16,0,0.019462400674819948
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,2,1,128,1,fp8,fp8,0,0.013579200208187103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,2,1,128,1,float16,float16,0,0.04472959935665131
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,2,1,128,1,float16,fp8,0,0.042936000227928164
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,2,2,128,1,float16,float16,0,0.029555198550224305
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,2,2,128,1,float16,fp8,0,0.024465599656105043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,2,1,128,1,float16,float16,0,0.024267199635505676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,2,1,128,1,float16,fp8,0,0.024350400269031524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,2,1,128,1,fp8,fp8,0,0.024414399266242982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,2,2,128,1,float16,float16,0,0.016646400094032288
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,2,2,128,1,float16,fp8,0,0.016678400337696075
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,2,2,128,1,fp8,fp8,0,0.016303999722003935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,2,1,128,1,float16,float16,0,0.01627040058374405
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,2,1,128,1,float16,fp8,0,0.01650079935789108
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,2,1,128,1,fp8,fp8,0,0.016575999557971954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,2,2,128,1,float16,float16,0,0.01401280015707016
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,2,2,128,1,float16,fp8,0,0.01404159963130951
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,2,2,128,1,fp8,fp8,0,0.01404000073671341
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,2,1,128,1,float16,float16,0,0.013676799833774567
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,2,1,128,1,float16,fp8,0,0.013808000087738036
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,2,1,128,1,fp8,fp8,0,0.013787199556827546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,2,2,128,1,float16,float16,0,0.01316480040550232
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,2,2,128,1,float16,fp8,0,0.012987199425697326
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,2,2,128,1,fp8,fp8,0,0.012580800056457519
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,2,1,128,1,float16,float16,0,0.012870399653911591
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,2,1,128,1,float16,fp8,0,0.01321599930524826
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,2,2,128,1,float16,float16,0,0.031204798817634584
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,2,1,128,1,fp8,fp8,0,0.013089600205421447
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,2,2,128,1,float16,float16,0,0.01244639977812767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,2,2,128,1,float16,fp8,0,0.012495999783277511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,2,2,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,2,1,128,1,float16,float16,0,0.012252800166606903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,2,1,128,1,float16,fp8,0,0.012390399724245072
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,2,1,128,1,fp8,fp8,0,0.012348800152540206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,2,2,128,1,float16,float16,0,0.011739200353622437
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,2,2,128,1,float16,fp8,0,0.011910399794578553
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,2,2,128,1,fp8,fp8,0,0.01199359968304634
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,2,1,128,1,float16,float16,0,0.011635199934244157
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,2,1,128,1,fp8,fp8,0,0.01719360053539276
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,2,1,128,1,float16,fp8,0,0.012036799639463424
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,2,1,128,1,fp8,fp8,0,0.011841599643230439
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,2,2,128,1,fp8,fp8,0,0.015505599975585937
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,128,2,1,128,1,fp8,fp8,0,0.04272800087928772
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,2,2,128,1,float16,float16,0,0.011547199636697768
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,2,2,128,1,fp8,fp8,0,0.02481440007686615
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,2,1,128,1,float16,fp8,0,0.011937599629163742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,2,2,128,1,float16,float16,0,0.011468800157308579
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,2,2,128,1,float16,fp8,0,0.011959999799728394
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,2,2,128,1,fp8,fp8,0,0.011828800290822982
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,2,1,128,1,float16,float16,0,0.011694400012493134
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,2,1,128,1,float16,fp8,0,0.011737599968910217
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,2,1,128,1,fp8,fp8,0,0.011900799721479416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,2,1,128,1,float16,float16,0,0.026368001103401185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,2,1,128,1,float16,fp8,0,0.0268528014421463
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,64,2,1,128,1,fp8,fp8,0,0.027236801385879517
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,2,2,128,1,float16,float16,0,0.01805119961500168
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,2,2,128,1,float16,fp8,0,0.01763039976358414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,2,2,128,1,fp8,fp8,0,0.018025599420070648
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,2,1,128,1,float16,float16,0,0.016891199350357055
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,2,1,128,1,float16,fp8,0,0.01764640063047409
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,2,1,128,1,fp8,fp8,0,0.01712000072002411
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,2,2,128,1,float16,float16,0,0.012579199671745301
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,2,2,128,1,float16,fp8,0,0.012943999469280243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,2,1,128,1,float16,float16,0,0.014260800182819366
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,2,1,128,1,float16,fp8,0,0.014584000408649444
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,2,1,128,1,fp8,fp8,0,0.01499360054731369
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,2,2,128,1,float16,fp8,0,0.010980799794197083
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,2,2,128,1,float16,float16,0,0.06653760075569153
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,2,2,128,1,fp8,fp8,0,0.010974399745464325
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,2,1,128,1,float16,float16,0,0.012720000743865967
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,2,1,128,1,float16,fp8,0,0.013044799864292144
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,2,2,128,1,float16,float16,0,0.010304000228643417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,2,2,128,1,float16,fp8,0,0.010315199941396713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,2,2,128,1,fp8,fp8,0,0.010132800042629241
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,2,1,128,1,float16,float16,0,0.011924800276756287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,2,1,128,1,float16,fp8,0,0.012244799733161926
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,2,1,128,1,fp8,fp8,0,0.01218079999089241
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,2,2,128,1,float16,float16,0,0.009998399764299393
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,2,2,128,1,float16,fp8,0,0.00968960002064705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,2,2,128,1,fp8,fp8,0,0.009598399698734283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,2,1,128,1,fp8,fp8,0,0.013864000141620637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,2,1,128,1,float16,float16,0,0.011726400256156922
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,2,1,128,1,float16,fp8,0,0.012041600048542022
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,2,1,128,1,fp8,fp8,0,0.01228640004992485
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,2,2,128,1,float16,float16,0,0.009841600060462951
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,2,2,128,1,float16,fp8,0,0.009721600264310837
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,2,2,128,1,fp8,fp8,0,0.009564799815416336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,2,1,128,1,float16,float16,0,0.01154239997267723
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,2,2,128,1,float16,fp8,0,0.011908800154924393
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,2,1,128,1,float16,float16,0,0.011531200259923935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,2,1,128,1,fp8,fp8,0,0.011774399876594543
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,2,2,128,1,float16,float16,0,0.009699200093746186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,2,2,128,1,fp8,fp8,0,0.009404800087213516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,2,1,128,1,float16,float16,0,0.01146719977259636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,2,1,128,1,float16,fp8,0,0.011776000261306763
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,2,1,128,1,fp8,fp8,0,0.01175680011510849
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,2,2,128,1,float16,float16,0,0.00984480008482933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,2,2,128,1,float16,fp8,0,0.009622400254011154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,2,2,128,1,fp8,fp8,0,0.009622400254011154
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,2,1,128,1,float16,float16,0,0.011486399918794632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,2,2,128,1,fp8,fp8,0,0.012219200283288956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,2,1,128,1,float16,fp8,0,0.011860799789428712
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,2,1,128,1,fp8,fp8,0,0.01170559972524643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,2,1,128,1,float16,float16,0,0.020577600598335265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,2,1,128,1,float16,fp8,0,0.020556800067424774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,2,2,128,1,float16,float16,0,0.016607999801635742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,2,2,128,1,float16,fp8,0,0.016414399445056915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,2,1,128,1,fp8,fp8,0,0.012982399761676788
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,2,2,128,1,fp8,fp8,0,0.016339200735092162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,2,1,128,1,float16,float16,0,0.014183999598026275
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,2,1,128,1,float16,fp8,0,0.013655999302864074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,2,1,128,1,fp8,fp8,0,0.01395840048789978
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,2,2,128,1,float16,float16,0,0.012038400024175644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,2,2,128,1,float16,fp8,0,0.011478400230407715
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,2,2,128,1,fp8,fp8,0,0.011825600266456604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,2,1,128,1,float16,float16,0,0.011638399958610535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,2,1,128,1,float16,fp8,0,0.012153600156307221
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,2,1,128,1,fp8,fp8,0,0.011456000059843064
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,2,2,128,1,float16,float16,0,0.011406400054693223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,2,2,128,1,float16,fp8,0,0.010768000036478043
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,2,2,128,1,fp8,fp8,0,0.01125440001487732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,2,1,128,1,float16,fp8,0,0.010876800119876861
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,2,1,128,1,fp8,fp8,0,0.010892800241708755
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,2,2,128,1,float16,float16,0,0.010225600004196167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,2,2,128,1,float16,fp8,0,0.010054399818181991
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,2,2,128,1,fp8,fp8,0,0.010047999769449234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,2,1,128,1,float16,float16,0,0.010014399886131287
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,2,1,128,1,float16,fp8,0,0.011825600266456604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,2,1,128,1,float16,fp8,0,0.009982399642467499
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,2,1,128,1,fp8,fp8,0,0.011825600266456604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,2,2,128,1,float16,float16,0,0.010156799852848054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,2,2,128,1,float16,fp8,0,0.00998080000281334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,2,1,128,1,float16,float16,0,0.009964799880981446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,2,1,128,1,float16,fp8,0,0.009737599641084671
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,2,1,128,1,fp8,fp8,0,0.009576000273227692
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,2,2,128,1,float16,float16,0,0.009673599898815156
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,2,2,128,1,float16,fp8,0,0.009329599887132644
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,2,2,128,1,fp8,fp8,0,0.009508799761533737
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,2,1,128,1,float16,float16,0,0.009787199646234512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,2,1,128,1,float16,fp8,0,0.009436800330877303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,2,1,128,1,fp8,fp8,0,0.009340800344944
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,2,2,128,1,float16,float16,0,0.009571199864149093
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,2,2,128,1,float16,fp8,0,0.009361600130796432
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,32,2,1,128,1,fp8,fp8,0,0.020475199818611144
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,2,2,128,1,fp8,fp8,0,0.00936639979481697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,2,1,128,1,float16,fp8,0,0.009312000125646591
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,2,1,128,1,fp8,fp8,0,0.009513600170612336
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,2,2,128,1,float16,float16,0,0.009788800030946732
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,2,2,128,1,float16,fp8,0,0.009534399956464767
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,2,2,128,1,fp8,fp8,0,0.009515199810266495
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,2,1,128,1,float16,float16,0,0.009799999743700027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,2,1,128,1,float16,fp8,0,0.009331200271844864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,2,1,128,1,fp8,fp8,0,0.009563200175762177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,2,1,128,1,float16,float16,0,0.010692799836397171
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,2,1,128,1,float16,fp8,0,0.01908479928970337
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,2,1,128,1,fp8,fp8,0,0.018875199556350707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,2,2,128,1,float16,float16,0,0.015982399880886077
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,2,2,128,1,float16,fp8,0,0.01555200070142746
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,2,2,128,1,fp8,fp8,0,0.015673600137233734
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,2,1,128,1,float16,float16,0,0.013187199831008911
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,2,2,128,1,fp8,fp8,0,0.011827199906110763
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,2,1,128,1,float16,fp8,0,0.013193599879741669
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,2,1,128,1,fp8,fp8,0,0.013084800541400909
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,2,1,128,1,fp8,fp8,0,0.009964799880981446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,2,2,128,1,float16,fp8,0,0.01186719983816147
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,2,2,128,1,fp8,fp8,0,0.011534400284290314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,2,2,128,1,float16,fp8,0,0.009646400064229965
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,2,1,128,1,float16,fp8,0,0.01165120005607605
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,2,1,128,1,fp8,fp8,0,0.011407999694347382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,2,2,128,1,float16,float16,0,0.010728000104427338
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,2,1,128,1,float16,float16,0,0.01072160005569458
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,2,1,128,1,float16,fp8,0,0.010639999806880952
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,2,1,128,1,fp8,fp8,0,0.010632000118494033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,2,2,128,1,float16,float16,0,0.009974399954080582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,2,2,128,1,float16,fp8,0,0.00997920036315918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,2,2,128,1,fp8,fp8,0,0.010016000270843506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,2,1,128,1,float16,float16,0,0.009958399832248688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,2,1,128,1,float16,fp8,0,0.009732799977064133
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,2,1,128,1,fp8,fp8,0,0.00958240032196045
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,2,2,128,1,float16,float16,0,0.00997920036315918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,2,1,128,1,float16,float16,0,0.009710399806499482
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,2,2,128,1,float16,fp8,0,0.009934400022029877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,2,2,128,1,fp8,fp8,0,0.009918399900197983
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,2,1,128,1,float16,fp8,0,0.009948799759149552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,2,1,128,1,fp8,fp8,0,0.010028800368309021
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,2,2,128,1,float16,float16,0,0.009833600372076035
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,16,2,1,128,1,float16,float16,0,0.01876640021800995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,2,2,128,1,float16,fp8,0,0.00939679965376854
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,2,2,128,1,fp8,fp8,0,0.009404800087213516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,2,1,128,1,float16,float16,0,0.009497600048780442
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,2,1,128,1,float16,fp8,0,0.009436800330877303
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,2,1,128,1,fp8,fp8,0,0.00950239971280098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,2,2,128,1,float16,float16,0,0.00968639999628067
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,2,2,128,1,float16,fp8,0,0.009382399916648864
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,2,2,128,1,float16,float16,0,0.011902400106191636
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,2,2,128,1,fp8,fp8,0,0.009310399740934372
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,2,2,128,1,fp8,fp8,0,0.009531199932098389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,2,1,128,1,float16,float16,0,0.009702400118112565
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,2,2,128,1,float16,float16,0,0.009676799923181535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,2,1,128,1,fp8,fp8,0,0.009355200082063675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,2,2,128,1,float16,fp8,0,0.01075040027499199
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,2,2,128,1,float16,fp8,0,0.009430400282144546
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,2,2,128,1,fp8,fp8,0,0.009296000003814697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,2,1,128,1,float16,float16,0,0.009764800220727921
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,2,1,128,1,float16,float16,0,0.01791200041770935
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,2,1,128,1,float16,fp8,0,0.017688000202178956
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,256,1,2,1,128,1,fp8,fp8,0,0.017683200538158417
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,2,2,128,1,float16,float16,0,0.01544640064239502
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,2,2,128,1,float16,fp8,0,0.015435199439525604
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,2,2,128,1,fp8,fp8,0,0.015484799444675446
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,2,1,128,1,float16,float16,0,0.013012799620628356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,2,1,128,1,float16,fp8,0,0.012912000715732574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,2,1,128,1,fp8,fp8,0,0.01295199990272522
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,2,2,128,1,float16,float16,0,0.011190400272607804
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,2,1,128,1,float16,float16,0,0.010102400183677673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,2,2,128,1,float16,fp8,0,0.011179199814796448
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,2,2,128,1,fp8,fp8,0,0.011204800009727478
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,2,1,128,1,float16,float16,0,0.01103999987244606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,2,1,128,1,float16,fp8,0,0.011310400068759918
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,2,1,128,1,fp8,fp8,0,0.011004800349473954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,2,2,128,1,float16,float16,0,0.010744000226259232
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,2,2,128,1,float16,fp8,0,0.010694400221109391
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,2,2,128,1,fp8,fp8,0,0.010564800351858139
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,2,1,128,1,float16,float16,0,0.010644800215959548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,2,1,128,1,float16,fp8,0,0.010367999970912933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,2,1,128,1,fp8,fp8,0,0.010681600123643876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,2,2,128,1,float16,float16,0,0.009988799691200256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,2,2,128,1,float16,fp8,0,0.009787199646234512
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,2,2,128,1,fp8,fp8,0,0.009828799962997436
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,2,1,128,1,float16,float16,0,0.009764800220727921
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,2,1,128,1,float16,fp8,0,0.009566400200128555
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,2,1,128,1,fp8,fp8,0,0.009707199782133103
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,2,2,128,1,float16,float16,0,0.009656000137329101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,2,2,128,1,float16,fp8,0,0.009644799679517747
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,2,2,128,1,fp8,fp8,0,0.009576000273227692
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,2,1,128,1,float16,float16,0,0.00990239977836609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,2,1,128,1,float16,fp8,0,0.009679999947547913
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,2,1,128,1,fp8,fp8,0,0.009780800342559815
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,2,2,128,1,float16,float16,0,0.00957920029759407
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,2,2,128,1,float16,fp8,0,0.00960479974746704
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,2,2,128,1,fp8,fp8,0,0.009556800127029419
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,2,1,128,1,float16,float16,0,0.009431999921798707
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,2,1,128,1,float16,fp8,0,0.009459199756383896
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,2,1,128,1,fp8,fp8,0,0.009380800276994705
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,2,2,128,1,float16,float16,0,0.00952960029244423
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,2,2,128,1,float16,fp8,0,0.009185600280761718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,2,2,128,1,fp8,fp8,0,0.00945120006799698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,2,1,128,1,float16,fp8,0,0.009345600008964538
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,2,1,128,1,float16,float16,0,0.009481599926948548
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,2,1,128,1,float16,fp8,0,0.009259200096130371
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,2,2,128,1,float16,float16,0,0.00950239971280098
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,2,2,128,1,float16,fp8,0,0.009273599833250046
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,2,2,128,1,fp8,fp8,0,0.010911999642848969
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,2,1,128,1,float16,float16,0,0.009480000287294389
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,2,1,128,1,float16,fp8,0,0.009404800087213516
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,2,1,128,1,fp8,fp8,0,0.009219200164079667
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,1,1,128,1,float16,fp8,0,0.3913615942001343
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,1,1,128,1,fp8,fp8,0,0.3910928010940552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,1,1,128,1,float16,float16,0,0.22726240158081054
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,1,1,128,1,float16,fp8,0,0.19877599477767943
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16384,1,1,128,1,fp8,fp8,0,0.19887200593948365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,1,1,128,1,float16,float16,0,0.21589601039886475
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,1,1,128,1,float16,fp8,0,0.19307199716567994
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16384,1,1,128,1,fp8,fp8,0,0.19452159404754638
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,1,1,128,1,float16,float16,0,0.31491680145263673
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,1,1,128,1,float16,fp8,0,0.27732000350952146
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,2,1,128,1,float16,float16,0,0.011937599629163742
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,12288,1,1,128,1,fp8,fp8,0,0.27595200538635256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,1,1,128,1,float16,float16,0,0.17485599517822265
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,2,2,128,1,fp8,fp8,0,0.00929120033979416
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,1,1,128,1,float16,fp8,0,0.15469919443130492
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,1,1,128,1,float16,float16,0,0.09947999715805053
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,12288,1,1,128,1,fp8,fp8,0,0.15347039699554443
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,1,1,128,1,float16,fp8,0,0.09039679765701295
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,12288,1,1,128,1,fp8,fp8,0,0.09017760157585145
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,1,1,128,1,float16,float16,0,0.24644479751586915
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,1,1,128,1,float16,float16,0,0.14622559547424316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,1,1,128,1,fp8,fp8,0,0.21842880249023439
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16384,1,1,128,1,float16,float16,0,0.444547176361084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,1,1,128,1,float16,fp8,0,0.12985440492630004
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,10240,1,1,128,1,fp8,fp8,0,0.13039040565490723
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,1,1,128,1,float16,fp8,0,0.07646080255508422
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,1,1,128,1,float16,float16,0,0.29910879135131835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,1,1,128,1,float16,fp8,0,0.2636703968048096
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,8192,1,1,128,1,fp8,fp8,0,0.26508479118347167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,1,1,128,1,float16,float16,0,0.1788640022277832
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,1,1,128,1,float16,fp8,0,0.1594320058822632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,8192,1,1,128,1,fp8,fp8,0,0.15895839929580688
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,1,1,128,1,float16,float16,0,0.11634240150451661
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,1,1,128,1,float16,fp8,0,0.10698879957199096
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,8192,1,1,128,1,fp8,fp8,0,0.10760320425033569
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,1,1,128,1,float16,float16,0,0.06877279877662659
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,1,1,128,1,float16,fp8,0,0.06516960263252258
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,8192,1,1,128,1,fp8,fp8,0,0.06384320259094238
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,2,1,128,1,fp8,fp8,0,0.009494400024414063
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,2,1,128,1,float16,fp8,0,0.009438399970531464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,1,1,128,1,float16,fp8,0,0.1697535991668701
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,1,1,128,1,float16,float16,0,0.11061279773712158
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,1,1,128,1,fp8,fp8,0,0.16971360445022582
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,1,1,128,1,float16,fp8,0,0.09987679719924927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,6144,1,1,128,1,fp8,fp8,0,0.09963200092315674
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,1,1,128,1,float16,float16,0,0.06878560185432434
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,1,1,128,1,fp8,fp8,0,0.07707840204238892
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,1,1,128,1,fp8,fp8,0,0.0643343985080719
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,1,1,128,1,float16,float16,0,0.04301599860191345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,1,1,128,1,float16,fp8,0,0.040424001216888425
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,6144,1,1,128,1,fp8,fp8,0,0.040720000863075256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,1,1,128,1,float16,float16,0,0.18071520328521729
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,1,1,128,1,float16,fp8,0,0.1622223973274231
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,4096,1,1,128,1,fp8,fp8,0,0.16242239475250245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,1,1,128,1,float16,fp8,0,0.09834079742431641
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,1,1,128,1,fp8,fp8,0,0.0986191987991333
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,1,1,128,1,float16,float16,0,0.06674079895019532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,1,1,128,1,float16,fp8,0,0.06140000224113464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,4096,1,1,128,1,fp8,fp8,0,0.061166399717330934
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,1,1,128,1,float16,float16,0,0.04380159974098206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,1,1,128,1,float16,fp8,0,0.041540798544883725
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,4096,1,1,128,1,fp8,fp8,0,0.041428801417350766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,1,1,128,1,float16,float16,0,0.028563201427459717
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,1,1,128,1,float16,fp8,0,0.02795040011405945
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,2,1,128,1,fp8,fp8,0,0.009321600198745728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,1,1,128,1,float16,float16,0,0.11914240121841431
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,10240,1,1,128,1,float16,fp8,0,0.21681599617004393
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,1,1,128,1,float16,float16,0,0.07206400036811829
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,1,1,128,1,fp8,fp8,0,0.10909279584884643
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,10240,1,1,128,1,float16,float16,0,0.08402079939842225
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,6144,1,1,128,1,float16,fp8,0,0.06444640159606933
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,1,1,128,1,float16,fp8,0,0.0684943974018097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,3072,1,1,128,1,fp8,fp8,0,0.0685536026954651
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,1,1,128,1,float16,float16,0,0.05134559869766235
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,1,1,128,1,float16,fp8,0,0.048785600066185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,3072,1,1,128,1,fp8,fp8,0,0.048977598547935486
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,1,1,128,1,float16,fp8,0,0.029996800422668456
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,1,1,128,1,fp8,fp8,0,0.029972800612449647
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,1,1,128,1,float16,float16,0,0.023316800594329834
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,1,1,128,1,float16,fp8,0,0.02301120012998581
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,3072,1,1,128,1,fp8,fp8,0,0.023012800514698027
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,1,1,128,1,float16,float16,0,0.11835039854049682
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,1,1,128,1,float16,fp8,0,0.10815199613571166
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,2048,1,1,128,1,fp8,fp8,0,0.10779680013656616
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,1,1,128,1,float16,float16,0,0.07128159999847412
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,4096,1,1,128,1,float16,float16,0,0.10876319408416749
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,1,1,128,1,float16,fp8,0,0.06467199921607972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,2048,1,1,128,1,fp8,fp8,0,0.06463040113449096
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,1,1,128,1,float16,float16,0,0.04200640022754669
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,1,1,128,1,float16,fp8,0,0.038980799913406375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,1,1,128,1,float16,fp8,0,0.031113600730895995
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,1,1,128,1,fp8,fp8,0,0.031097599864006044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,1,1,128,1,float16,fp8,0,0.022668799757957457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,1,1,128,1,fp8,fp8,0,0.022668799757957457
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,1,1,128,1,float16,float16,0,0.019331200420856474
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,1,1,128,1,float16,fp8,0,0.019555200636386872
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,2048,1,1,128,1,fp8,fp8,0,0.01960480064153671
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,1,1,128,1,float16,float16,0,0.081632000207901
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,1,1,128,1,float16,fp8,0,0.0759935975074768
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1536,1,1,128,1,fp8,fp8,0,0.07565600275993348
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,4096,1,1,128,1,fp8,fp8,0,0.027910399436950683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,1,1,128,1,float16,float16,0,0.049534401297569274
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,1,1,128,1,float16,fp8,0,0.04647839963436127
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,3072,1,1,128,1,float16,fp8,0,0.10944000482559205
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,1,1,128,1,float16,float16,0,0.038771200180053714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,1,1,128,1,float16,fp8,0,0.03896799981594086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1536,1,1,128,1,fp8,fp8,0,0.038576000928878786
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,1,1,128,1,float16,float16,0,0.0251008003950119
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,1,1,128,1,fp8,fp8,0,0.02486239969730377
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,1,1,128,1,float16,float16,0,0.019950400292873382
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,3072,1,1,128,1,float16,float16,0,0.030563199520111085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,1,1,128,1,float16,fp8,0,0.01977760046720505
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1536,1,1,128,1,fp8,fp8,0,0.019465599954128266
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,1,1,128,1,float16,fp8,0,0.01851679980754852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,1,1,128,1,fp8,fp8,0,0.01858399957418442
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,1,1,128,1,float16,float16,0,0.08643519878387451
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,1,1,128,1,float16,fp8,0,0.08363360166549683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,2048,1,1,128,1,fp8,fp8,0,0.0392655998468399
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1024,1,1,128,1,fp8,fp8,0,0.08344320058822632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,2048,1,1,128,1,float16,float16,0,0.0222448006272316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,1,1,128,1,float16,float16,0,0.05167040228843689
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,1,1,128,1,float16,fp8,0,0.04856959879398346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1024,1,1,128,1,fp8,fp8,0,0.04833920001983642
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,1,1,128,1,float16,float16,0,0.029718399047851562
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,1,1,128,1,float16,fp8,0,0.028803199529647827
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1024,1,1,128,1,fp8,fp8,0,0.02887200117111206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,1,1,128,1,float16,float16,0,0.02590720057487488
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,1,1,128,1,float16,fp8,0,0.02650879919528961
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1024,1,1,128,1,fp8,fp8,0,0.026185598969459534
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,1,1,128,1,float16,float16,0,0.020443199574947356
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,1,1,128,1,float16,fp8,0,0.020683200657367708
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1024,1,1,128,1,fp8,fp8,0,0.020553599298000335
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,1,1,128,1,float16,float16,0,0.017854399979114532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,1,1,128,1,float16,fp8,0,0.01852799952030182
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1024,1,1,128,1,fp8,fp8,0,0.018588800728321076
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,1,1,128,1,float16,float16,0,0.01672160029411316
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,1,1,128,1,float16,fp8,0,0.01730400025844574
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1024,1,1,128,1,fp8,fp8,0,0.017448000609874725
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,1,1,128,1,float16,float16,0,0.06578879952430725
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,1,1,128,1,float16,fp8,0,0.06559680104255676
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,6144,1,1,128,1,float16,float16,0,0.19239360094070435
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,512,1,1,128,1,fp8,fp8,0,0.06568959951400757
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,1,1,128,1,float16,float16,0,0.038852798938751223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,1,1,128,1,float16,fp8,0,0.038099199533462524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,1,1,128,1,float16,float16,0,0.02266719937324524
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,1,1,128,1,float16,fp8,0,0.023080000281333925
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,512,1,1,128,1,fp8,fp8,0,0.02299039959907532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,1,1,128,1,float16,float16,0,0.019177600741386414
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,1,1,128,1,float16,fp8,0,0.019592000544071196
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,512,1,1,128,1,fp8,fp8,0,0.01990399956703186
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,1,1,128,1,float16,float16,0,0.01785600036382675
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1536,1,1,128,1,float16,fp8,0,0.024817599356174468
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,1,1,128,1,fp8,fp8,0,0.018489600718021394
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,1,1,128,1,float16,float16,0,0.01663520038127899
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,1,1,128,1,fp8,fp8,0,0.017500799894332886
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,1,1,128,1,float16,float16,0,0.016128000617027283
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,1,1,128,1,float16,fp8,0,0.017057600617408752
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1536,1,1,128,1,float16,float16,0,0.01759359985589981
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,512,1,1,128,1,fp8,fp8,0,0.017115199565887453
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,1,1,128,1,float16,float16,0,0.01592160016298294
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,1,1,128,1,float16,fp8,0,0.017097599804401398
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,512,1,1,128,1,fp8,fp8,0,0.01687840074300766
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,1,1,128,1,float16,float16,0,0.03438880145549774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,1,1,128,1,float16,fp8,0,0.033537599444389346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,256,1,1,128,1,fp8,fp8,0,0.03378559947013855
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,1,1,128,1,float16,float16,0,0.020652799308300017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,1,1,128,1,float16,fp8,0,0.02102400064468384
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,256,1,1,128,1,fp8,fp8,0,0.020942400395870208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,1,1,128,1,float16,float16,0,0.01666080057621002
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,1,1,128,1,float16,fp8,0,0.01720159947872162
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,256,1,1,128,1,fp8,fp8,0,0.01748320013284683
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,1,1,128,1,float16,float16,0,0.014619199931621552
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,1,1,128,1,float16,fp8,0,0.015396800637245179
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,256,1,1,128,1,fp8,fp8,0,0.01555519998073578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,1,1,128,1,float16,float16,0,0.013952000439167023
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,1,1,128,1,float16,fp8,0,0.014534400403499603
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,256,1,1,128,1,fp8,fp8,0,0.014553600549697876
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,1,1,128,1,float16,fp8,0,0.014006400108337402
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,1,1,128,1,fp8,fp8,0,0.014009599387645722
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,1,1,128,1,float16,float16,0,0.013132800161838532
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,1,1,128,1,float16,fp8,0,0.013798399269580841
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,256,1,1,128,1,fp8,fp8,0,0.013859200477600097
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,1,1,128,1,float16,float16,0,0.012996800243854523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,1,1,128,1,float16,fp8,0,0.013897599279880523
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,256,1,1,128,1,fp8,fp8,0,0.01388320028781891
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,1,1,128,1,float16,float16,0,0.02048960030078888
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,1,1,128,1,float16,fp8,0,0.0204927995800972
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,512,1,1,128,1,fp8,fp8,0,0.03825600147247314
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,128,1,1,128,1,fp8,fp8,0,0.02018879950046539
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1536,1,1,128,1,fp8,fp8,0,0.0466048002243042
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,1,1,128,1,float16,fp8,0,0.015897600352764128
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,1,1,128,1,float16,float16,0,0.015988799929618835
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,128,1,1,128,1,fp8,fp8,0,0.015857599675655365
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,1,1,128,1,float16,fp8,0,0.013841600716114044
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,1,1,128,1,fp8,fp8,0,0.013625599443912506
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,1,1,128,1,float16,fp8,0,0.012643200159072877
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,512,1,1,128,1,float16,fp8,0,0.017584000527858735
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,1,1,128,1,fp8,fp8,0,0.012705600261688233
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,1,1,128,1,float16,fp8,0,0.012161599844694138
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,1,1,128,1,fp8,fp8,0,0.012144000083208085
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,1,1,128,1,float16,float16,0,0.011662399768829346
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,1,1,128,1,float16,fp8,0,0.011791999638080596
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,128,1,1,128,1,fp8,fp8,0,0.011715199798345566
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,1,1,128,1,float16,float16,0,0.011345600336790084
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,1,1,128,1,float16,fp8,0,0.011572799831628799
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,128,1,1,128,1,fp8,fp8,0,0.011556799709796905
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,1,1,128,1,float16,float16,0,0.01148959994316101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,1,1,128,1,float16,fp8,0,0.011811199784278869
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,2048,1,1,128,1,float16,float16,0,0.03168799877166748
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,1,1,128,1,float16,float16,0,0.015095999836921692
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,1,1,128,1,float16,fp8,0,0.015044799447059632
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,64,1,1,128,1,fp8,fp8,0,0.015028800070285796
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,1,1,128,1,float16,float16,0,0.01263200044631958
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,1,1,128,1,float16,fp8,0,0.01250240057706833
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,64,1,1,128,1,fp8,fp8,0,0.012334399670362473
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,1,1,128,1,float16,float16,0,0.011308799684047698
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,1,1,128,1,float16,fp8,0,0.011406400054693223
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,64,1,1,128,1,fp8,fp8,0,0.011164800077676774
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,1,1,128,1,float16,float16,0,0.01064319983124733
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,1,1,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,1,1,128,1,float16,float16,0,0.010195200145244599
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,1,1,128,1,float16,fp8,0,0.009939199686050415
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,64,1,1,128,1,fp8,fp8,0,0.009924799948930741
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,1,1,128,1,float16,float16,0,0.009652800112962722
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,1,1,128,1,float16,fp8,0,0.00941760018467903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,64,1,1,128,1,fp8,fp8,0,0.009372799843549728
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,128,1,1,128,1,float16,float16,0,0.013950400054454803
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,1,1,128,1,float16,float16,0,0.00968799963593483
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,1,1,128,1,float16,fp8,0,0.00955360010266304
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,128,1,1,128,1,float16,float16,0,0.012375999987125397
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,1,1,128,1,float16,float16,0,0.00976639986038208
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,1,1,128,1,float16,fp8,0,0.009438399970531464
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,64,1,1,128,1,fp8,fp8,0,0.009523200243711472
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,128,1,1,128,1,float16,float16,0,0.011856000125408172
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,1,1,128,1,float16,float16,0,0.01438080072402954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,1,1,128,1,float16,fp8,0,0.013843199610710144
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,1,1,128,1,float16,float16,0,0.011873599886894227
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,1,1,128,1,float16,fp8,0,0.011820799857378005
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,32,1,1,128,1,fp8,fp8,0,0.011593600362539291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,1,1,128,1,float16,float16,0,0.010678400099277497
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,1,1,128,1,float16,fp8,0,0.010599999874830245
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,32,1,1,128,1,fp8,fp8,0,0.010606399923563003
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,1,1,128,1,float16,float16,0,0.010046400129795074
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,128,1,1,128,1,fp8,fp8,0,0.011854399740695954
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,1,1,128,1,float16,fp8,0,0.009967999905347824
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,32,1,1,128,1,fp8,fp8,0,0.009921599924564362
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,1,1,128,1,float16,float16,0,0.009996800124645234
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,1,1,128,1,float16,fp8,0,0.009892799705266953
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,32,1,1,128,1,fp8,fp8,0,0.009676799923181535
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,1,1,128,1,float16,float16,0,0.00958079993724823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,1,1,128,1,float16,fp8,0,0.00931679978966713
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,256,1,1,128,1,float16,float16,0,0.013116799294948578
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,1,1,128,1,float16,float16,0,0.009784000366926194
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,1,1,128,1,fp8,fp8,0,0.009561599791049957
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,1,1,128,1,float16,float16,0,0.009724800288677216
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,1,1,128,1,float16,fp8,0,0.009468799829483033
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,32,1,1,128,1,fp8,fp8,0,0.00944959968328476
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,1,1,128,1,float16,float16,0,0.012929600477218629
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,1,1,128,1,float16,fp8,0,0.01279039978981018
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,16,1,1,128,1,fp8,fp8,0,0.01282079964876175
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,64,1,1,128,1,float16,fp8,0,0.009891200065612792
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,1,1,128,1,float16,float16,0,0.011552000045776367
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,1,1,128,1,float16,fp8,0,0.011185599863529206
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,16,1,1,128,1,fp8,fp8,0,0.011446399986743927
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,1,1,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,1,1,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,1,1,128,1,float16,float16,0,0.009937600046396256
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,1,1,128,1,float16,fp8,0,0.009838400036096573
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,16,1,1,128,1,fp8,fp8,0,0.009920000284910201
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,1,1,128,1,float16,float16,0,0.009984000027179718
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,512,1,1,128,1,float16,fp8,0,0.018694399297237395
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,1,1,128,1,fp8,fp8,0,0.009911999851465226
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,1,1,128,1,float16,float16,0,0.009769599884748459
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,1,1,128,1,float16,fp8,0,0.009726399928331375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,16,1,1,128,1,fp8,fp8,0,0.009636799991130828
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,32,1,1,128,1,fp8,fp8,0,0.014187200367450714
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,1,1,128,1,float16,float16,0,0.009627199918031692
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,1,1,128,1,fp8,fp8,0,0.009563200175762177
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,1,1,128,1,float16,float16,0,0.009612800180912017
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,1,1,128,1,float16,fp8,0,0.009467200189828873
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,16,1,1,128,1,fp8,fp8,0,0.009444800019264222
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,1,1,128,1,float16,float16,0,0.012415999919176102
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,1,1,128,1,float16,fp8,0,0.012392000108957291
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,128,1,1,1,128,1,fp8,fp8,0,0.012382400035858155
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,1,1,128,1,float16,float16,0,0.01148959994316101
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,1,1,128,1,float16,fp8,0,0.011539199948310852
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,32,1,1,128,1,fp8,fp8,0,0.009375999867916106
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,64,1,1,1,128,1,fp8,fp8,0,0.01165440008044243
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,1,1,128,1,float16,float16,0,0.010276799649000167
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,32,1,1,128,1,float16,fp8,0,0.009369599819183349
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,1,1,128,1,fp8,fp8,0,0.010260800272226334
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,1,1,128,1,float16,float16,0,0.009900800138711929
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,1,1,128,1,fp8,fp8,0,0.009779199957847595
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,1,1,128,1,float16,float16,0,0.009700799733400345
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,1,1,128,1,float16,fp8,0,0.009723199903964997
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,1,1,1,128,1,fp8,fp8,0,0.009726399928331375
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,1,1,128,1,float16,float16,0,0.009244800359010697
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,1,1,128,1,float16,fp8,0,0.009167999774217606
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,16,1,1,128,1,float16,fp8,0,0.010715200006961823
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,4,1,1,1,128,1,fp8,fp8,0,0.009193599969148637
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,1,1,128,1,float16,fp8,0,0.00931520015001297
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,1,1,128,1,float16,float16,0,0.00963039994239807
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,1,1,1,128,1,fp8,fp8,0,0.009299200028181076
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,1,1,128,1,float16,float16,0,0.009560000151395798
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,1,1,128,1,float16,fp8,0,0.009435199946165086
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,1,1,1,1,128,1,fp8,fp8,0,0.00941760018467903
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,8,16,1,1,128,1,float16,fp8,0,0.00987199991941452
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,16,1,1,128,1,float16,fp8,0,0.009583999961614608
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,32,1,1,1,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,16,1,1,1,128,1,float16,fp8,0,0.009675200283527374
SGLang,0.5.10,NVIDIA H200,context_attention,flash_attention,2,64,1,1,128,1,fp8,fp8,0,0.009665600210428237
